diff options
Diffstat (limited to 'drivers/edac/sb_edac.c')
-rw-r--r-- | drivers/edac/sb_edac.c | 475 |
1 files changed, 207 insertions, 268 deletions
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index a203536d90d..f3b1f9fafa4 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -58,7 +58,7 @@ static int probed; /* * FIXME: For now, let's order by device function, as it makes - * easier for driver's development proccess. This table should be + * easier for driver's development process. This table should be * moved to pci_id.h when submitted upstream */ #define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */ @@ -314,8 +314,6 @@ struct sbridge_pvt { struct sbridge_info info; struct sbridge_channel channel[NUM_CHANNELS]; - int csrow_map[NUM_CHANNELS][MAX_DIMMS]; - /* Memory type detection */ bool is_mirrored, is_lockstep, is_close_pg; @@ -375,7 +373,7 @@ static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = { /**************************************************************************** - Anciliary status routines + Ancillary status routines ****************************************************************************/ static inline int numrank(u32 mtr) @@ -383,8 +381,8 @@ static inline int numrank(u32 mtr) int ranks = (1 << RANK_CNT_BITS(mtr)); if (ranks > 4) { - debugf0("Invalid number of ranks: %d (max = 4) raw value = %x (%04x)", - ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr); + edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n", + ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr); return -EINVAL; } @@ -396,8 +394,8 @@ static inline int numrow(u32 mtr) int rows = (RANK_WIDTH_BITS(mtr) + 12); if (rows < 13 || rows > 18) { - debugf0("Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)", - rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr); + edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n", + rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr); return -EINVAL; } @@ -409,8 +407,8 @@ static inline int numcol(u32 mtr) int cols = (COL_WIDTH_BITS(mtr) + 10); if (cols > 12) { - debugf0("Invalid number of cols: %d (max = 4) raw value = %x (%04x)", - cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr); + edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n", + cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr); return -EINVAL; } @@ -477,8 +475,8 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot, if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot && PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) { - debugf1("Associated %02x.%02x.%d with %p\n", - bus, slot, func, sbridge_dev->pdev[i]); + edac_dbg(1, "Associated %02x.%02x.%d with %p\n", + bus, slot, func, sbridge_dev->pdev[i]); return sbridge_dev->pdev[i]; } } @@ -487,29 +485,14 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot, } /** - * sbridge_get_active_channels() - gets the number of channels and csrows + * check_if_ecc_is_active() - Checks if ECC is active * bus: Device bus - * @channels: Number of channels that will be returned - * @csrows: Number of csrows found - * - * Since EDAC core needs to know in advance the number of available channels - * and csrows, in order to allocate memory for csrows/channels, it is needed - * to run two similar steps. At the first step, implemented on this function, - * it checks the number of csrows/channels present at one socket, identified - * by the associated PCI bus. - * this is used in order to properly allocate the size of mci components. - * Note: one csrow is one dimm. */ -static int sbridge_get_active_channels(const u8 bus, unsigned *channels, - unsigned *csrows) +static int check_if_ecc_is_active(const u8 bus) { struct pci_dev *pdev = NULL; - int i, j; u32 mcmtr; - *channels = 0; - *csrows = 0; - pdev = get_pdev_slot_func(bus, 15, 0); if (!pdev) { sbridge_printk(KERN_ERR, "Couldn't find PCI device " @@ -523,41 +506,14 @@ static int sbridge_get_active_channels(const u8 bus, unsigned *channels, sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); return -ENODEV; } - - for (i = 0; i < NUM_CHANNELS; i++) { - u32 mtr; - - /* Device 15 functions 2 - 5 */ - pdev = get_pdev_slot_func(bus, 15, 2 + i); - if (!pdev) { - sbridge_printk(KERN_ERR, "Couldn't find PCI device " - "%2x.%02d.%d!!!\n", - bus, 15, 2 + i); - return -ENODEV; - } - (*channels)++; - - for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { - pci_read_config_dword(pdev, mtr_regs[j], &mtr); - debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr); - if (IS_DIMM_PRESENT(mtr)) - (*csrows)++; - } - } - - debugf0("Number of active channels: %d, number of active dimms: %d\n", - *channels, *csrows); - return 0; } -static int get_dimm_config(const struct mem_ctl_info *mci) +static int get_dimm_config(struct mem_ctl_info *mci) { struct sbridge_pvt *pvt = mci->pvt_info; - struct csrow_info *csr; + struct dimm_info *dimm; int i, j, banks, ranks, rows, cols, size, npages; - int csrow = 0; - unsigned long last_page = 0; u32 reg; enum edac_type mode; enum mem_type mtype; @@ -567,45 +523,45 @@ static int get_dimm_config(const struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_br, SAD_CONTROL, ®); pvt->sbridge_dev->node_id = NODE_ID(reg); - debugf0("mc#%d: Node ID: %d, source ID: %d\n", - pvt->sbridge_dev->mc, - pvt->sbridge_dev->node_id, - pvt->sbridge_dev->source_id); + edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n", + pvt->sbridge_dev->mc, + pvt->sbridge_dev->node_id, + pvt->sbridge_dev->source_id); pci_read_config_dword(pvt->pci_ras, RASENABLES, ®); if (IS_MIRROR_ENABLED(reg)) { - debugf0("Memory mirror is enabled\n"); + edac_dbg(0, "Memory mirror is enabled\n"); pvt->is_mirrored = true; } else { - debugf0("Memory mirror is disabled\n"); + edac_dbg(0, "Memory mirror is disabled\n"); pvt->is_mirrored = false; } pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr); if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) { - debugf0("Lockstep is enabled\n"); + edac_dbg(0, "Lockstep is enabled\n"); mode = EDAC_S8ECD8ED; pvt->is_lockstep = true; } else { - debugf0("Lockstep is disabled\n"); + edac_dbg(0, "Lockstep is disabled\n"); mode = EDAC_S4ECD4ED; pvt->is_lockstep = false; } if (IS_CLOSE_PG(pvt->info.mcmtr)) { - debugf0("address map is on closed page mode\n"); + edac_dbg(0, "address map is on closed page mode\n"); pvt->is_close_pg = true; } else { - debugf0("address map is on open page mode\n"); + edac_dbg(0, "address map is on open page mode\n"); pvt->is_close_pg = false; } - pci_read_config_dword(pvt->pci_ta, RANK_CFG_A, ®); + pci_read_config_dword(pvt->pci_ddrio, RANK_CFG_A, ®); if (IS_RDIMM_ENABLED(reg)) { /* FIXME: Can also be LRDIMM */ - debugf0("Memory is registered\n"); + edac_dbg(0, "Memory is registered\n"); mtype = MEM_RDDR3; } else { - debugf0("Memory is unregistered\n"); + edac_dbg(0, "Memory is unregistered\n"); mtype = MEM_DDR3; } @@ -616,9 +572,11 @@ static int get_dimm_config(const struct mem_ctl_info *mci) u32 mtr; for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, + i, j, 0); pci_read_config_dword(pvt->pci_tad[i], mtr_regs[j], &mtr); - debugf4("Channel #%d MTR%d = %x\n", i, j, mtr); + edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); if (IS_DIMM_PRESENT(mtr)) { pvt->channel[i].dimms++; @@ -630,33 +588,19 @@ static int get_dimm_config(const struct mem_ctl_info *mci) size = (rows * cols * banks * ranks) >> (20 - 3); npages = MiB_TO_PAGES(size); - debugf0("mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", - pvt->sbridge_dev->mc, i, j, - size, npages, - banks, ranks, rows, cols); - csr = &mci->csrows[csrow]; - - csr->first_page = last_page; - csr->last_page = last_page + npages - 1; - csr->page_mask = 0UL; /* Unused */ - csr->nr_pages = npages; - csr->grain = 32; - csr->csrow_idx = csrow; - csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4; - csr->ce_count = 0; - csr->ue_count = 0; - csr->mtype = mtype; - csr->edac_mode = mode; - csr->nr_channels = 1; - csr->channels[0].chan_idx = i; - csr->channels[0].ce_count = 0; - pvt->csrow_map[i][j] = csrow; - snprintf(csr->channels[0].label, - sizeof(csr->channels[0].label), + edac_dbg(0, "mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + pvt->sbridge_dev->mc, i, j, + size, npages, + banks, ranks, rows, cols); + + dimm->nr_pages = npages; + dimm->grain = 32; + dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4; + dimm->mtype = mtype; + dimm->edac_mode = mode; + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_Channel#%u_DIMM#%u", pvt->sbridge_dev->source_id, i, j); - last_page += npages; - csrow++; } } } @@ -685,8 +629,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = (1 + pvt->tolm) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - debugf0("TOLM: %u.%03u GB (0x%016Lx)\n", - mb, kb, (u64)pvt->tolm); + edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm); /* Address range is already 45:25 */ pci_read_config_dword(pvt->pci_sad1, TOHM, @@ -695,8 +638,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = (1 + pvt->tohm) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - debugf0("TOHM: %u.%03u GB (0x%016Lx)", - mb, kb, (u64)pvt->tohm); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list @@ -719,13 +661,13 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = (limit + 1) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - debugf0("SAD#%d %s up to %u.%03u GB (0x%016Lx) %s reg=0x%08x\n", - n_sads, - get_dram_attr(reg), - mb, kb, - ((u64)tmp_mb) << 20L, - INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]", - reg); + edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n", + n_sads, + get_dram_attr(reg), + mb, kb, + ((u64)tmp_mb) << 20L, + INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]", + reg); prv = limit; pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], @@ -735,8 +677,8 @@ static void get_memory_layout(const struct mem_ctl_info *mci) if (j > 0 && sad_interl == sad_pkg(reg, j)) break; - debugf0("SAD#%d, interleave #%d: %d\n", - n_sads, j, sad_pkg(reg, j)); + edac_dbg(0, "SAD#%d, interleave #%d: %d\n", + n_sads, j, sad_pkg(reg, j)); } } @@ -753,16 +695,16 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = (limit + 1) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - debugf0("TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", - n_tads, mb, kb, - ((u64)tmp_mb) << 20L, - (u32)TAD_SOCK(reg), - (u32)TAD_CH(reg), - (u32)TAD_TGT0(reg), - (u32)TAD_TGT1(reg), - (u32)TAD_TGT2(reg), - (u32)TAD_TGT3(reg), - reg); + edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", + n_tads, mb, kb, + ((u64)tmp_mb) << 20L, + (u32)TAD_SOCK(reg), + (u32)TAD_CH(reg), + (u32)TAD_TGT0(reg), + (u32)TAD_TGT1(reg), + (u32)TAD_TGT2(reg), + (u32)TAD_TGT3(reg), + reg); prv = limit; } @@ -778,11 +720,11 @@ static void get_memory_layout(const struct mem_ctl_info *mci) ®); tmp_mb = TAD_OFFSET(reg) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - debugf0("TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n", - i, j, - mb, kb, - ((u64)tmp_mb) << 20L, - reg); + edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n", + i, j, + mb, kb, + ((u64)tmp_mb) << 20L, + reg); } } @@ -803,12 +745,12 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = RIR_LIMIT(reg) >> 20; rir_way = 1 << RIR_WAY(reg); mb = div_u64_rem(tmp_mb, 1000, &kb); - debugf0("CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n", - i, j, - mb, kb, - ((u64)tmp_mb) << 20L, - rir_way, - reg); + edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n", + i, j, + mb, kb, + ((u64)tmp_mb) << 20L, + rir_way, + reg); for (k = 0; k < rir_way; k++) { pci_read_config_dword(pvt->pci_tad[i], @@ -817,12 +759,12 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = RIR_OFFSET(reg) << 6; mb = div_u64_rem(tmp_mb, 1000, &kb); - debugf0("CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", - i, j, k, - mb, kb, - ((u64)tmp_mb) << 20L, - (u32)RIR_RNK_TGT(reg), - reg); + edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", + i, j, k, + mb, kb, + ((u64)tmp_mb) << 20L, + (u32)RIR_RNK_TGT(reg), + reg); } } } @@ -844,11 +786,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci, u8 *socket, long *channel_mask, u8 *rank, - char *area_type) + char **area_type, char *msg) { struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; - char msg[256]; int n_rir, n_sads, n_tads, sad_way, sck_xch; int sad_interl, idx, base_ch; int interleave_mode; @@ -870,12 +811,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci, */ if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) { sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } if (addr >= (u64)pvt->tohm) { sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } @@ -892,7 +831,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, limit = SAD_LIMIT(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory socket"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } if (addr <= limit) @@ -901,10 +839,9 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } if (n_sads == MAX_SAD) { sprintf(msg, "Can't discover the memory socket"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } - area_type = get_dram_attr(reg); + *area_type = get_dram_attr(reg); interleave_mode = INTERLEAVE_MODE(reg); pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], @@ -914,16 +851,16 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way)) break; sad_interleave[sad_way] = sad_pkg(reg, sad_way); - debugf0("SAD interleave #%d: %d\n", - sad_way, sad_interleave[sad_way]); + edac_dbg(0, "SAD interleave #%d: %d\n", + sad_way, sad_interleave[sad_way]); } - debugf0("mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", - pvt->sbridge_dev->mc, - n_sads, - addr, - limit, - sad_way + 7, - interleave_mode ? "" : "XOR[18:16]"); + edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n", + pvt->sbridge_dev->mc, + n_sads, + addr, + limit, + sad_way + 7, + interleave_mode ? "" : "XOR[18:16]"); if (interleave_mode) idx = ((addr >> 6) ^ (addr >> 16)) & 7; else @@ -942,12 +879,11 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; default: sprintf(msg, "Can't discover socket interleave"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } *socket = sad_interleave[idx]; - debugf0("SAD interleave index: %d (wayness %d) = CPU socket %d\n", - idx, sad_way, *socket); + edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n", + idx, sad_way, *socket); /* * Move to the proper node structure, in order to access the @@ -957,7 +893,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (!new_mci) { sprintf(msg, "Struct for socket #%u wasn't initialized", *socket); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } mci = new_mci; @@ -973,7 +908,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, limit = TAD_LIMIT(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory channel"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } if (addr <= limit) @@ -1013,7 +947,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; default: sprintf(msg, "Can't discover the TAD target"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } *channel_mask = 1 << base_ch; @@ -1027,7 +960,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; default: sprintf(msg, "Invalid mirror set. Can't decode addr"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } } else @@ -1038,16 +970,16 @@ static int get_memory_error_data(struct mem_ctl_info *mci, offset = TAD_OFFSET(tad_offset); - debugf0("TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n", - n_tads, - addr, - limit, - (u32)TAD_SOCK(reg), - ch_way, - offset, - idx, - base_ch, - *channel_mask); + edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n", + n_tads, + addr, + limit, + (u32)TAD_SOCK(reg), + ch_way, + offset, + idx, + base_ch, + *channel_mask); /* Calculate channel address */ /* Remove the TAD offset */ @@ -1055,7 +987,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (offset > addr) { sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!", offset, addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } addr -= offset; @@ -1084,18 +1015,17 @@ static int get_memory_error_data(struct mem_ctl_info *mci, limit = RIR_LIMIT(reg); mb = div_u64_rem(limit >> 20, 1000, &kb); - debugf0("RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n", - n_rir, - mb, kb, - limit, - 1 << RIR_WAY(reg)); + edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n", + n_rir, + mb, kb, + limit, + 1 << RIR_WAY(reg)); if (ch_addr <= limit) break; } if (n_rir == MAX_RIR_RANGES) { sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx", ch_addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } rir_way = RIR_WAY(reg); @@ -1110,12 +1040,12 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ®); *rank = RIR_RNK_TGT(reg); - debugf0("RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", - n_rir, - ch_addr, - limit, - rir_way, - idx); + edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", + n_rir, + ch_addr, + limit, + rir_way, + idx); return 0; } @@ -1132,14 +1062,14 @@ static void sbridge_put_devices(struct sbridge_dev *sbridge_dev) { int i; - debugf0(__FILE__ ": %s()\n", __func__); + edac_dbg(0, "\n"); for (i = 0; i < sbridge_dev->n_devs; i++) { struct pci_dev *pdev = sbridge_dev->pdev[i]; if (!pdev) continue; - debugf0("Removing dev %02x:%02x.%d\n", - pdev->bus->number, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + edac_dbg(0, "Removing dev %02x:%02x.%d\n", + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); pci_dev_put(pdev); } } @@ -1245,10 +1175,9 @@ static int sbridge_get_onedevice(struct pci_dev **prev, return -ENODEV; } - debugf0("Detected dev %02x:%d.%d PCI ID %04x:%04x\n", - bus, dev_descr->dev, - dev_descr->func, - PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n", + bus, dev_descr->dev, dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); /* * As stated on drivers/pci/search.c, the reference count for @@ -1365,10 +1294,10 @@ static int mci_bind_devs(struct mem_ctl_info *mci, goto error; } - debugf0("Associated PCI %02x.%02d.%d with dev = %p\n", - sbridge_dev->bus, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), - pdev); + edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n", + sbridge_dev->bus, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev); } /* Check if everything were registered */ @@ -1409,7 +1338,8 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, { struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; - char *type, *optype, *msg, *recoverable_msg; + enum hw_event_mc_err_type tp_event; + char *type, *optype, msg[256]; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); @@ -1421,16 +1351,24 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, u32 optypenum = GET_BITFIELD(m->status, 4, 6); long channel_mask, first_channel; u8 rank, socket; - int csrow, rc, dimm; - char *area_type = "Unknown"; - - if (ripv) - type = "NON_FATAL"; - else - type = "FATAL"; + int rc, dimm; + char *area_type = NULL; + + if (uncorrected_error) { + if (ripv) { + type = "FATAL"; + tp_event = HW_EVENT_ERR_FATAL; + } else { + type = "NON_FATAL"; + tp_event = HW_EVENT_ERR_UNCORRECTED; + } + } else { + type = "CORRECTED"; + tp_event = HW_EVENT_ERR_CORRECTED; + } /* - * According with Table 15-9 of the Intel Archictecture spec vol 3A, + * According with Table 15-9 of the Intel Architecture spec vol 3A, * memory errors should fit in this mask: * 000f 0000 1mmm cccc (binary) * where: @@ -1445,19 +1383,19 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } else { switch (optypenum) { case 0: - optype = "generic undef request"; + optype = "generic undef request error"; break; case 1: - optype = "memory read"; + optype = "memory read error"; break; case 2: - optype = "memory write"; + optype = "memory write error"; break; case 3: - optype = "addr/cmd"; + optype = "addr/cmd error"; break; case 4: - optype = "memory scrubbing"; + optype = "memory scrubbing error"; break; default: optype = "reserved"; @@ -1466,13 +1404,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } rc = get_memory_error_data(mci, m->addr, &socket, - &channel_mask, &rank, area_type); + &channel_mask, &rank, &area_type, msg); if (rc < 0) - return; + goto err_parsing; new_mci = get_mci_for_node_id(socket); if (!new_mci) { - edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!"); - return; + strcpy(msg, "Error: socket got corrupted!"); + goto err_parsing; } mci = new_mci; pvt = mci->pvt_info; @@ -1486,45 +1424,38 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, else dimm = 2; - csrow = pvt->csrow_map[first_channel][dimm]; - - if (uncorrected_error && recoverable) - recoverable_msg = " recoverable"; - else - recoverable_msg = ""; /* - * FIXME: What should we do with "channel" information on mcelog? - * Probably, we can just discard it, as the channel information - * comes from the get_memory_error_data() address decoding + * FIXME: On some memory configurations (mirror, lockstep), the + * Memory Controller can't point the error to a single DIMM. The + * EDAC core should be handling the channel mask, in order to point + * to the group of dimm's where the error may be happening. */ - msg = kasprintf(GFP_ATOMIC, - "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), " - "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n", - core_err_cnt, - area_type, - optype, - type, - recoverable_msg, - overflow ? "OVERFLOW" : "", - m->cpu, - mscod, errcode, - channel, /* 1111b means not specified */ - (long long) m->addr, - socket, - first_channel, /* This is the real channel on SB */ - channel_mask, - rank); - - debugf0("%s", msg); + snprintf(msg, sizeof(msg), + "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + area_type, + mscod, errcode, + socket, + channel_mask, + rank); + + edac_dbg(0, "%s\n", msg); + + /* FIXME: need support for channel mask */ /* Call the helper to output message */ - if (uncorrected_error) - edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg); - else - edac_mc_handle_fbd_ce(mci, csrow, 0, msg); + edac_mc_handle_error(tp_event, mci, core_err_cnt, + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, + channel, dimm, -1, + optype, msg); + return; +err_parsing: + edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, + -1, -1, -1, + msg, ""); - kfree(msg); } /* @@ -1657,8 +1588,7 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) struct sbridge_pvt *pvt; if (unlikely(!mci || !mci->pvt_info)) { - debugf0("MC: " __FILE__ ": %s(): dev = %p\n", - __func__, &sbridge_dev->pdev[0]->dev); + edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev); sbridge_printk(KERN_ERR, "Couldn't find mci handler\n"); return; @@ -1666,15 +1596,13 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) pvt = mci->pvt_info; - debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", - __func__, mci, &sbridge_dev->pdev[0]->dev); - - mce_unregister_decode_chain(&sbridge_mce_dec); + edac_dbg(0, "MC: mci = %p, dev = %p\n", + mci, &sbridge_dev->pdev[0]->dev); /* Remove MC sysfs nodes */ - edac_mc_del_mc(mci->dev); + edac_mc_del_mc(mci->pdev); - debugf1("%s: free mci struct\n", mci->ctl_name); + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); kfree(mci->ctl_name); edac_mc_free(mci); sbridge_dev->mci = NULL; @@ -1683,21 +1611,30 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct sbridge_pvt *pvt; - int rc, channels, csrows; + int rc; /* Check the number of active and not disabled channels */ - rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows); + rc = check_if_ecc_is_active(sbridge_dev->bus); if (unlikely(rc < 0)) return rc; /* allocate a new MC control structure */ - mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc); + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = NUM_CHANNELS; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = MAX_DIMMS; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (unlikely(!mci)) return -ENOMEM; - debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n", - __func__, mci, &sbridge_dev->pdev[0]->dev); + edac_dbg(0, "MC: mci = %p, dev = %p\n", + mci, &sbridge_dev->pdev[0]->dev); pvt = mci->pvt_info; memset(pvt, 0, sizeof(*pvt)); @@ -1728,17 +1665,15 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) get_memory_layout(mci); /* record ptr to the generic device */ - mci->dev = &sbridge_dev->pdev[0]->dev; + mci->pdev = &sbridge_dev->pdev[0]->dev; /* add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { - debugf0("MC: " __FILE__ - ": %s(): failed edac_mc_add_mc()\n", __func__); + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); rc = -EINVAL; goto fail0; } - mce_register_decode_chain(&sbridge_mce_dec); return 0; fail0: @@ -1781,7 +1716,8 @@ static int __devinit sbridge_probe(struct pci_dev *pdev, mc = 0; list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) { - debugf0("Registering MC#%d (%d of %d)\n", mc, mc + 1, num_mc); + edac_dbg(0, "Registering MC#%d (%d of %d)\n", + mc, mc + 1, num_mc); sbridge_dev->mc = mc++; rc = sbridge_register_mci(sbridge_dev); if (unlikely(rc < 0)) @@ -1811,7 +1747,7 @@ static void __devexit sbridge_remove(struct pci_dev *pdev) { struct sbridge_dev *sbridge_dev; - debugf0(__FILE__ ": %s()\n", __func__); + edac_dbg(0, "\n"); /* * we have a trouble here: pdev value for removal will be wrong, since @@ -1860,15 +1796,17 @@ static int __init sbridge_init(void) { int pci_rc; - debugf2("MC: " __FILE__ ": %s()\n", __func__); + edac_dbg(2, "\n"); /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); pci_rc = pci_register_driver(&sbridge_driver); - if (pci_rc >= 0) + if (pci_rc >= 0) { + mce_register_decode_chain(&sbridge_mce_dec); return 0; + } sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", pci_rc); @@ -1882,8 +1820,9 @@ static int __init sbridge_init(void) */ static void __exit sbridge_exit(void) { - debugf2("MC: " __FILE__ ": %s()\n", __func__); + edac_dbg(2, "\n"); pci_unregister_driver(&sbridge_driver); + mce_unregister_decode_chain(&sbridge_mce_dec); } module_init(sbridge_init); |