summaryrefslogtreecommitdiffstats
path: root/drivers/edac/sb_edac.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/sb_edac.c')
-rw-r--r--drivers/edac/sb_edac.c475
1 files changed, 207 insertions, 268 deletions
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index a203536d90d..f3b1f9fafa4 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -58,7 +58,7 @@ static int probed;
/*
* FIXME: For now, let's order by device function, as it makes
- * easier for driver's development proccess. This table should be
+ * easier for driver's development process. This table should be
* moved to pci_id.h when submitted upstream
*/
#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */
@@ -314,8 +314,6 @@ struct sbridge_pvt {
struct sbridge_info info;
struct sbridge_channel channel[NUM_CHANNELS];
- int csrow_map[NUM_CHANNELS][MAX_DIMMS];
-
/* Memory type detection */
bool is_mirrored, is_lockstep, is_close_pg;
@@ -375,7 +373,7 @@ static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = {
/****************************************************************************
- Anciliary status routines
+ Ancillary status routines
****************************************************************************/
static inline int numrank(u32 mtr)
@@ -383,8 +381,8 @@ static inline int numrank(u32 mtr)
int ranks = (1 << RANK_CNT_BITS(mtr));
if (ranks > 4) {
- debugf0("Invalid number of ranks: %d (max = 4) raw value = %x (%04x)",
- ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr);
+ edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n",
+ ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr);
return -EINVAL;
}
@@ -396,8 +394,8 @@ static inline int numrow(u32 mtr)
int rows = (RANK_WIDTH_BITS(mtr) + 12);
if (rows < 13 || rows > 18) {
- debugf0("Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)",
- rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
+ edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n",
+ rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
return -EINVAL;
}
@@ -409,8 +407,8 @@ static inline int numcol(u32 mtr)
int cols = (COL_WIDTH_BITS(mtr) + 10);
if (cols > 12) {
- debugf0("Invalid number of cols: %d (max = 4) raw value = %x (%04x)",
- cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
+ edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n",
+ cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
return -EINVAL;
}
@@ -477,8 +475,8 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot &&
PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) {
- debugf1("Associated %02x.%02x.%d with %p\n",
- bus, slot, func, sbridge_dev->pdev[i]);
+ edac_dbg(1, "Associated %02x.%02x.%d with %p\n",
+ bus, slot, func, sbridge_dev->pdev[i]);
return sbridge_dev->pdev[i];
}
}
@@ -487,29 +485,14 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
}
/**
- * sbridge_get_active_channels() - gets the number of channels and csrows
+ * check_if_ecc_is_active() - Checks if ECC is active
* bus: Device bus
- * @channels: Number of channels that will be returned
- * @csrows: Number of csrows found
- *
- * Since EDAC core needs to know in advance the number of available channels
- * and csrows, in order to allocate memory for csrows/channels, it is needed
- * to run two similar steps. At the first step, implemented on this function,
- * it checks the number of csrows/channels present at one socket, identified
- * by the associated PCI bus.
- * this is used in order to properly allocate the size of mci components.
- * Note: one csrow is one dimm.
*/
-static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
- unsigned *csrows)
+static int check_if_ecc_is_active(const u8 bus)
{
struct pci_dev *pdev = NULL;
- int i, j;
u32 mcmtr;
- *channels = 0;
- *csrows = 0;
-
pdev = get_pdev_slot_func(bus, 15, 0);
if (!pdev) {
sbridge_printk(KERN_ERR, "Couldn't find PCI device "
@@ -523,41 +506,14 @@ static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
return -ENODEV;
}
-
- for (i = 0; i < NUM_CHANNELS; i++) {
- u32 mtr;
-
- /* Device 15 functions 2 - 5 */
- pdev = get_pdev_slot_func(bus, 15, 2 + i);
- if (!pdev) {
- sbridge_printk(KERN_ERR, "Couldn't find PCI device "
- "%2x.%02d.%d!!!\n",
- bus, 15, 2 + i);
- return -ENODEV;
- }
- (*channels)++;
-
- for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
- pci_read_config_dword(pdev, mtr_regs[j], &mtr);
- debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr);
- if (IS_DIMM_PRESENT(mtr))
- (*csrows)++;
- }
- }
-
- debugf0("Number of active channels: %d, number of active dimms: %d\n",
- *channels, *csrows);
-
return 0;
}
-static int get_dimm_config(const struct mem_ctl_info *mci)
+static int get_dimm_config(struct mem_ctl_info *mci)
{
struct sbridge_pvt *pvt = mci->pvt_info;
- struct csrow_info *csr;
+ struct dimm_info *dimm;
int i, j, banks, ranks, rows, cols, size, npages;
- int csrow = 0;
- unsigned long last_page = 0;
u32 reg;
enum edac_type mode;
enum mem_type mtype;
@@ -567,45 +523,45 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_br, SAD_CONTROL, &reg);
pvt->sbridge_dev->node_id = NODE_ID(reg);
- debugf0("mc#%d: Node ID: %d, source ID: %d\n",
- pvt->sbridge_dev->mc,
- pvt->sbridge_dev->node_id,
- pvt->sbridge_dev->source_id);
+ edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
+ pvt->sbridge_dev->mc,
+ pvt->sbridge_dev->node_id,
+ pvt->sbridge_dev->source_id);
pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
if (IS_MIRROR_ENABLED(reg)) {
- debugf0("Memory mirror is enabled\n");
+ edac_dbg(0, "Memory mirror is enabled\n");
pvt->is_mirrored = true;
} else {
- debugf0("Memory mirror is disabled\n");
+ edac_dbg(0, "Memory mirror is disabled\n");
pvt->is_mirrored = false;
}
pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
- debugf0("Lockstep is enabled\n");
+ edac_dbg(0, "Lockstep is enabled\n");
mode = EDAC_S8ECD8ED;
pvt->is_lockstep = true;
} else {
- debugf0("Lockstep is disabled\n");
+ edac_dbg(0, "Lockstep is disabled\n");
mode = EDAC_S4ECD4ED;
pvt->is_lockstep = false;
}
if (IS_CLOSE_PG(pvt->info.mcmtr)) {
- debugf0("address map is on closed page mode\n");
+ edac_dbg(0, "address map is on closed page mode\n");
pvt->is_close_pg = true;
} else {
- debugf0("address map is on open page mode\n");
+ edac_dbg(0, "address map is on open page mode\n");
pvt->is_close_pg = false;
}
- pci_read_config_dword(pvt->pci_ta, RANK_CFG_A, &reg);
+ pci_read_config_dword(pvt->pci_ddrio, RANK_CFG_A, &reg);
if (IS_RDIMM_ENABLED(reg)) {
/* FIXME: Can also be LRDIMM */
- debugf0("Memory is registered\n");
+ edac_dbg(0, "Memory is registered\n");
mtype = MEM_RDDR3;
} else {
- debugf0("Memory is unregistered\n");
+ edac_dbg(0, "Memory is unregistered\n");
mtype = MEM_DDR3;
}
@@ -616,9 +572,11 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
u32 mtr;
for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
+ dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+ i, j, 0);
pci_read_config_dword(pvt->pci_tad[i],
mtr_regs[j], &mtr);
- debugf4("Channel #%d MTR%d = %x\n", i, j, mtr);
+ edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
if (IS_DIMM_PRESENT(mtr)) {
pvt->channel[i].dimms++;
@@ -630,33 +588,19 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
size = (rows * cols * banks * ranks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
- debugf0("mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
- pvt->sbridge_dev->mc, i, j,
- size, npages,
- banks, ranks, rows, cols);
- csr = &mci->csrows[csrow];
-
- csr->first_page = last_page;
- csr->last_page = last_page + npages - 1;
- csr->page_mask = 0UL; /* Unused */
- csr->nr_pages = npages;
- csr->grain = 32;
- csr->csrow_idx = csrow;
- csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
- csr->ce_count = 0;
- csr->ue_count = 0;
- csr->mtype = mtype;
- csr->edac_mode = mode;
- csr->nr_channels = 1;
- csr->channels[0].chan_idx = i;
- csr->channels[0].ce_count = 0;
- pvt->csrow_map[i][j] = csrow;
- snprintf(csr->channels[0].label,
- sizeof(csr->channels[0].label),
+ edac_dbg(0, "mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+ pvt->sbridge_dev->mc, i, j,
+ size, npages,
+ banks, ranks, rows, cols);
+
+ dimm->nr_pages = npages;
+ dimm->grain = 32;
+ dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
+ dimm->mtype = mtype;
+ dimm->edac_mode = mode;
+ snprintf(dimm->label, sizeof(dimm->label),
"CPU_SrcID#%u_Channel#%u_DIMM#%u",
pvt->sbridge_dev->source_id, i, j);
- last_page += npages;
- csrow++;
}
}
}
@@ -685,8 +629,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
tmp_mb = (1 + pvt->tolm) >> 20;
mb = div_u64_rem(tmp_mb, 1000, &kb);
- debugf0("TOLM: %u.%03u GB (0x%016Lx)\n",
- mb, kb, (u64)pvt->tolm);
+ edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm);
/* Address range is already 45:25 */
pci_read_config_dword(pvt->pci_sad1, TOHM,
@@ -695,8 +638,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
tmp_mb = (1 + pvt->tohm) >> 20;
mb = div_u64_rem(tmp_mb, 1000, &kb);
- debugf0("TOHM: %u.%03u GB (0x%016Lx)",
- mb, kb, (u64)pvt->tohm);
+ edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm);
/*
* Step 2) Get SAD range and SAD Interleave list
@@ -719,13 +661,13 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
tmp_mb = (limit + 1) >> 20;
mb = div_u64_rem(tmp_mb, 1000, &kb);
- debugf0("SAD#%d %s up to %u.%03u GB (0x%016Lx) %s reg=0x%08x\n",
- n_sads,
- get_dram_attr(reg),
- mb, kb,
- ((u64)tmp_mb) << 20L,
- INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]",
- reg);
+ edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n",
+ n_sads,
+ get_dram_attr(reg),
+ mb, kb,
+ ((u64)tmp_mb) << 20L,
+ INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]",
+ reg);
prv = limit;
pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
@@ -735,8 +677,8 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
if (j > 0 && sad_interl == sad_pkg(reg, j))
break;
- debugf0("SAD#%d, interleave #%d: %d\n",
- n_sads, j, sad_pkg(reg, j));
+ edac_dbg(0, "SAD#%d, interleave #%d: %d\n",
+ n_sads, j, sad_pkg(reg, j));
}
}
@@ -753,16 +695,16 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
tmp_mb = (limit + 1) >> 20;
mb = div_u64_rem(tmp_mb, 1000, &kb);
- debugf0("TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
- n_tads, mb, kb,
- ((u64)tmp_mb) << 20L,
- (u32)TAD_SOCK(reg),
- (u32)TAD_CH(reg),
- (u32)TAD_TGT0(reg),
- (u32)TAD_TGT1(reg),
- (u32)TAD_TGT2(reg),
- (u32)TAD_TGT3(reg),
- reg);
+ edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
+ n_tads, mb, kb,
+ ((u64)tmp_mb) << 20L,
+ (u32)TAD_SOCK(reg),
+ (u32)TAD_CH(reg),
+ (u32)TAD_TGT0(reg),
+ (u32)TAD_TGT1(reg),
+ (u32)TAD_TGT2(reg),
+ (u32)TAD_TGT3(reg),
+ reg);
prv = limit;
}
@@ -778,11 +720,11 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
&reg);
tmp_mb = TAD_OFFSET(reg) >> 20;
mb = div_u64_rem(tmp_mb, 1000, &kb);
- debugf0("TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
- i, j,
- mb, kb,
- ((u64)tmp_mb) << 20L,
- reg);
+ edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
+ i, j,
+ mb, kb,
+ ((u64)tmp_mb) << 20L,
+ reg);
}
}
@@ -803,12 +745,12 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
tmp_mb = RIR_LIMIT(reg) >> 20;
rir_way = 1 << RIR_WAY(reg);
mb = div_u64_rem(tmp_mb, 1000, &kb);
- debugf0("CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
- i, j,
- mb, kb,
- ((u64)tmp_mb) << 20L,
- rir_way,
- reg);
+ edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
+ i, j,
+ mb, kb,
+ ((u64)tmp_mb) << 20L,
+ rir_way,
+ reg);
for (k = 0; k < rir_way; k++) {
pci_read_config_dword(pvt->pci_tad[i],
@@ -817,12 +759,12 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
tmp_mb = RIR_OFFSET(reg) << 6;
mb = div_u64_rem(tmp_mb, 1000, &kb);
- debugf0("CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
- i, j, k,
- mb, kb,
- ((u64)tmp_mb) << 20L,
- (u32)RIR_RNK_TGT(reg),
- reg);
+ edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
+ i, j, k,
+ mb, kb,
+ ((u64)tmp_mb) << 20L,
+ (u32)RIR_RNK_TGT(reg),
+ reg);
}
}
}
@@ -844,11 +786,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
u8 *socket,
long *channel_mask,
u8 *rank,
- char *area_type)
+ char **area_type, char *msg)
{
struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt = mci->pvt_info;
- char msg[256];
int n_rir, n_sads, n_tads, sad_way, sck_xch;
int sad_interl, idx, base_ch;
int interleave_mode;
@@ -870,12 +811,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
*/
if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
if (addr >= (u64)pvt->tohm) {
sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
@@ -892,7 +831,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
limit = SAD_LIMIT(reg);
if (limit <= prv) {
sprintf(msg, "Can't discover the memory socket");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
if (addr <= limit)
@@ -901,10 +839,9 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
}
if (n_sads == MAX_SAD) {
sprintf(msg, "Can't discover the memory socket");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
- area_type = get_dram_attr(reg);
+ *area_type = get_dram_attr(reg);
interleave_mode = INTERLEAVE_MODE(reg);
pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
@@ -914,16 +851,16 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way))
break;
sad_interleave[sad_way] = sad_pkg(reg, sad_way);
- debugf0("SAD interleave #%d: %d\n",
- sad_way, sad_interleave[sad_way]);
+ edac_dbg(0, "SAD interleave #%d: %d\n",
+ sad_way, sad_interleave[sad_way]);
}
- debugf0("mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
- pvt->sbridge_dev->mc,
- n_sads,
- addr,
- limit,
- sad_way + 7,
- interleave_mode ? "" : "XOR[18:16]");
+ edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
+ pvt->sbridge_dev->mc,
+ n_sads,
+ addr,
+ limit,
+ sad_way + 7,
+ interleave_mode ? "" : "XOR[18:16]");
if (interleave_mode)
idx = ((addr >> 6) ^ (addr >> 16)) & 7;
else
@@ -942,12 +879,11 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
break;
default:
sprintf(msg, "Can't discover socket interleave");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
*socket = sad_interleave[idx];
- debugf0("SAD interleave index: %d (wayness %d) = CPU socket %d\n",
- idx, sad_way, *socket);
+ edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n",
+ idx, sad_way, *socket);
/*
* Move to the proper node structure, in order to access the
@@ -957,7 +893,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
if (!new_mci) {
sprintf(msg, "Struct for socket #%u wasn't initialized",
*socket);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
mci = new_mci;
@@ -973,7 +908,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
limit = TAD_LIMIT(reg);
if (limit <= prv) {
sprintf(msg, "Can't discover the memory channel");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
if (addr <= limit)
@@ -1013,7 +947,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
break;
default:
sprintf(msg, "Can't discover the TAD target");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
*channel_mask = 1 << base_ch;
@@ -1027,7 +960,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
break;
default:
sprintf(msg, "Invalid mirror set. Can't decode addr");
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
} else
@@ -1038,16 +970,16 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
offset = TAD_OFFSET(tad_offset);
- debugf0("TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
- n_tads,
- addr,
- limit,
- (u32)TAD_SOCK(reg),
- ch_way,
- offset,
- idx,
- base_ch,
- *channel_mask);
+ edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
+ n_tads,
+ addr,
+ limit,
+ (u32)TAD_SOCK(reg),
+ ch_way,
+ offset,
+ idx,
+ base_ch,
+ *channel_mask);
/* Calculate channel address */
/* Remove the TAD offset */
@@ -1055,7 +987,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
if (offset > addr) {
sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
offset, addr);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
addr -= offset;
@@ -1084,18 +1015,17 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
limit = RIR_LIMIT(reg);
mb = div_u64_rem(limit >> 20, 1000, &kb);
- debugf0("RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
- n_rir,
- mb, kb,
- limit,
- 1 << RIR_WAY(reg));
+ edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
+ n_rir,
+ mb, kb,
+ limit,
+ 1 << RIR_WAY(reg));
if (ch_addr <= limit)
break;
}
if (n_rir == MAX_RIR_RANGES) {
sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
ch_addr);
- edac_mc_handle_ce_no_info(mci, msg);
return -EINVAL;
}
rir_way = RIR_WAY(reg);
@@ -1110,12 +1040,12 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
&reg);
*rank = RIR_RNK_TGT(reg);
- debugf0("RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
- n_rir,
- ch_addr,
- limit,
- rir_way,
- idx);
+ edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
+ n_rir,
+ ch_addr,
+ limit,
+ rir_way,
+ idx);
return 0;
}
@@ -1132,14 +1062,14 @@ static void sbridge_put_devices(struct sbridge_dev *sbridge_dev)
{
int i;
- debugf0(__FILE__ ": %s()\n", __func__);
+ edac_dbg(0, "\n");
for (i = 0; i < sbridge_dev->n_devs; i++) {
struct pci_dev *pdev = sbridge_dev->pdev[i];
if (!pdev)
continue;
- debugf0("Removing dev %02x:%02x.%d\n",
- pdev->bus->number,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ edac_dbg(0, "Removing dev %02x:%02x.%d\n",
+ pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
pci_dev_put(pdev);
}
}
@@ -1245,10 +1175,9 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
return -ENODEV;
}
- debugf0("Detected dev %02x:%d.%d PCI ID %04x:%04x\n",
- bus, dev_descr->dev,
- dev_descr->func,
- PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
+ edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n",
+ bus, dev_descr->dev, dev_descr->func,
+ PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
/*
* As stated on drivers/pci/search.c, the reference count for
@@ -1365,10 +1294,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci,
goto error;
}
- debugf0("Associated PCI %02x.%02d.%d with dev = %p\n",
- sbridge_dev->bus,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
- pdev);
+ edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
+ sbridge_dev->bus,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+ pdev);
}
/* Check if everything were registered */
@@ -1409,7 +1338,8 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
{
struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt = mci->pvt_info;
- char *type, *optype, *msg, *recoverable_msg;
+ enum hw_event_mc_err_type tp_event;
+ char *type, *optype, msg[256];
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1421,16 +1351,24 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
u32 optypenum = GET_BITFIELD(m->status, 4, 6);
long channel_mask, first_channel;
u8 rank, socket;
- int csrow, rc, dimm;
- char *area_type = "Unknown";
-
- if (ripv)
- type = "NON_FATAL";
- else
- type = "FATAL";
+ int rc, dimm;
+ char *area_type = NULL;
+
+ if (uncorrected_error) {
+ if (ripv) {
+ type = "FATAL";
+ tp_event = HW_EVENT_ERR_FATAL;
+ } else {
+ type = "NON_FATAL";
+ tp_event = HW_EVENT_ERR_UNCORRECTED;
+ }
+ } else {
+ type = "CORRECTED";
+ tp_event = HW_EVENT_ERR_CORRECTED;
+ }
/*
- * According with Table 15-9 of the Intel Archictecture spec vol 3A,
+ * According with Table 15-9 of the Intel Architecture spec vol 3A,
* memory errors should fit in this mask:
* 000f 0000 1mmm cccc (binary)
* where:
@@ -1445,19 +1383,19 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
} else {
switch (optypenum) {
case 0:
- optype = "generic undef request";
+ optype = "generic undef request error";
break;
case 1:
- optype = "memory read";
+ optype = "memory read error";
break;
case 2:
- optype = "memory write";
+ optype = "memory write error";
break;
case 3:
- optype = "addr/cmd";
+ optype = "addr/cmd error";
break;
case 4:
- optype = "memory scrubbing";
+ optype = "memory scrubbing error";
break;
default:
optype = "reserved";
@@ -1466,13 +1404,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
}
rc = get_memory_error_data(mci, m->addr, &socket,
- &channel_mask, &rank, area_type);
+ &channel_mask, &rank, &area_type, msg);
if (rc < 0)
- return;
+ goto err_parsing;
new_mci = get_mci_for_node_id(socket);
if (!new_mci) {
- edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!");
- return;
+ strcpy(msg, "Error: socket got corrupted!");
+ goto err_parsing;
}
mci = new_mci;
pvt = mci->pvt_info;
@@ -1486,45 +1424,38 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
else
dimm = 2;
- csrow = pvt->csrow_map[first_channel][dimm];
-
- if (uncorrected_error && recoverable)
- recoverable_msg = " recoverable";
- else
- recoverable_msg = "";
/*
- * FIXME: What should we do with "channel" information on mcelog?
- * Probably, we can just discard it, as the channel information
- * comes from the get_memory_error_data() address decoding
+ * FIXME: On some memory configurations (mirror, lockstep), the
+ * Memory Controller can't point the error to a single DIMM. The
+ * EDAC core should be handling the channel mask, in order to point
+ * to the group of dimm's where the error may be happening.
*/
- msg = kasprintf(GFP_ATOMIC,
- "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), "
- "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n",
- core_err_cnt,
- area_type,
- optype,
- type,
- recoverable_msg,
- overflow ? "OVERFLOW" : "",
- m->cpu,
- mscod, errcode,
- channel, /* 1111b means not specified */
- (long long) m->addr,
- socket,
- first_channel, /* This is the real channel on SB */
- channel_mask,
- rank);
-
- debugf0("%s", msg);
+ snprintf(msg, sizeof(msg),
+ "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
+ overflow ? " OVERFLOW" : "",
+ (uncorrected_error && recoverable) ? " recoverable" : "",
+ area_type,
+ mscod, errcode,
+ socket,
+ channel_mask,
+ rank);
+
+ edac_dbg(0, "%s\n", msg);
+
+ /* FIXME: need support for channel mask */
/* Call the helper to output message */
- if (uncorrected_error)
- edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg);
- else
- edac_mc_handle_fbd_ce(mci, csrow, 0, msg);
+ edac_mc_handle_error(tp_event, mci, core_err_cnt,
+ m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
+ channel, dimm, -1,
+ optype, msg);
+ return;
+err_parsing:
+ edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
+ -1, -1, -1,
+ msg, "");
- kfree(msg);
}
/*
@@ -1657,8 +1588,7 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
struct sbridge_pvt *pvt;
if (unlikely(!mci || !mci->pvt_info)) {
- debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
- __func__, &sbridge_dev->pdev[0]->dev);
+ edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev);
sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
return;
@@ -1666,15 +1596,13 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
pvt = mci->pvt_info;
- debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
- __func__, mci, &sbridge_dev->pdev[0]->dev);
-
- mce_unregister_decode_chain(&sbridge_mce_dec);
+ edac_dbg(0, "MC: mci = %p, dev = %p\n",
+ mci, &sbridge_dev->pdev[0]->dev);
/* Remove MC sysfs nodes */
- edac_mc_del_mc(mci->dev);
+ edac_mc_del_mc(mci->pdev);
- debugf1("%s: free mci struct\n", mci->ctl_name);
+ edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
kfree(mci->ctl_name);
edac_mc_free(mci);
sbridge_dev->mci = NULL;
@@ -1683,21 +1611,30 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
{
struct mem_ctl_info *mci;
+ struct edac_mc_layer layers[2];
struct sbridge_pvt *pvt;
- int rc, channels, csrows;
+ int rc;
/* Check the number of active and not disabled channels */
- rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows);
+ rc = check_if_ecc_is_active(sbridge_dev->bus);
if (unlikely(rc < 0))
return rc;
/* allocate a new MC control structure */
- mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc);
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = NUM_CHANNELS;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = MAX_DIMMS;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
+ sizeof(*pvt));
+
if (unlikely(!mci))
return -ENOMEM;
- debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
- __func__, mci, &sbridge_dev->pdev[0]->dev);
+ edac_dbg(0, "MC: mci = %p, dev = %p\n",
+ mci, &sbridge_dev->pdev[0]->dev);
pvt = mci->pvt_info;
memset(pvt, 0, sizeof(*pvt));
@@ -1728,17 +1665,15 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
get_memory_layout(mci);
/* record ptr to the generic device */
- mci->dev = &sbridge_dev->pdev[0]->dev;
+ mci->pdev = &sbridge_dev->pdev[0]->dev;
/* add this new MC control structure to EDAC's list of MCs */
if (unlikely(edac_mc_add_mc(mci))) {
- debugf0("MC: " __FILE__
- ": %s(): failed edac_mc_add_mc()\n", __func__);
+ edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
rc = -EINVAL;
goto fail0;
}
- mce_register_decode_chain(&sbridge_mce_dec);
return 0;
fail0:
@@ -1781,7 +1716,8 @@ static int __devinit sbridge_probe(struct pci_dev *pdev,
mc = 0;
list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
- debugf0("Registering MC#%d (%d of %d)\n", mc, mc + 1, num_mc);
+ edac_dbg(0, "Registering MC#%d (%d of %d)\n",
+ mc, mc + 1, num_mc);
sbridge_dev->mc = mc++;
rc = sbridge_register_mci(sbridge_dev);
if (unlikely(rc < 0))
@@ -1811,7 +1747,7 @@ static void __devexit sbridge_remove(struct pci_dev *pdev)
{
struct sbridge_dev *sbridge_dev;
- debugf0(__FILE__ ": %s()\n", __func__);
+ edac_dbg(0, "\n");
/*
* we have a trouble here: pdev value for removal will be wrong, since
@@ -1860,15 +1796,17 @@ static int __init sbridge_init(void)
{
int pci_rc;
- debugf2("MC: " __FILE__ ": %s()\n", __func__);
+ edac_dbg(2, "\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
pci_rc = pci_register_driver(&sbridge_driver);
- if (pci_rc >= 0)
+ if (pci_rc >= 0) {
+ mce_register_decode_chain(&sbridge_mce_dec);
return 0;
+ }
sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
pci_rc);
@@ -1882,8 +1820,9 @@ static int __init sbridge_init(void)
*/
static void __exit sbridge_exit(void)
{
- debugf2("MC: " __FILE__ ": %s()\n", __func__);
+ edac_dbg(2, "\n");
pci_unregister_driver(&sbridge_driver);
+ mce_unregister_decode_chain(&sbridge_mce_dec);
}
module_init(sbridge_init);