From 926311fd7dabcd284a1e8a87a3e2bb5f929c0c60 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 11 Jan 2010 20:58:21 +0100 Subject: amd64_edac: Ensure index stays within bounds in amd64_get_scrub_rate Add a missing iterator variable thus fixing the conditional of the for-loop in amd64_get_scrub_rate(). Signed-off-by: Roel Kluin Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/edac') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c5facd951dd..000dc67b85b 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -197,7 +197,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) edac_printk(KERN_DEBUG, EDAC_MC, "pci-read, sdram scrub control value: %d \n", scrubval); - for (i = 0; ARRAY_SIZE(scrubrates); i++) { + for (i = 0; i < ARRAY_SIZE(scrubrates); i++) { if (scrubrates[i].scrubval == scrubval) { *bw = scrubrates[i].bandwidth; status = 0; -- cgit v1.2.3-70-g09d2 From 118f3e1afd5534c15f9701f33514186cfc841a27 Mon Sep 17 00:00:00 2001 From: Tamas Vincze Date: Fri, 15 Jan 2010 17:01:10 -0800 Subject: edac: i5000_edac critical fix panic out of bounds EDAC MC0: INTERNAL ERROR: channel-b out of range (4 >= 4) Kernel panic - not syncing: EDAC MC0: Uncorrected Error (XEN) Domain 0 crashed: 'noreboot' set - not rebooting. This happens because FERR_NF_FBD bit 28 is not updated on i5000. Due to that, both bits 28 and 29 may be equal to one, returning channel = 3. As this value is invalid, EDAC core generates the panic. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=14568 Signed-off-by: Tamas Vincze Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Doug Thompson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/edac/i5000_edac.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/edac') diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 77a9579d716..adc10a2ac5f 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -577,7 +577,13 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, debugf0("\tUncorrected bits= 0x%x\n", ue_errors); branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd); - channel = branch; + + /* + * According with i5000 datasheet, bit 28 has no significance + * for errors M4Err-M12Err and M17Err-M21Err, on FERR_NF_FBD + */ + channel = branch & 2; + bank = NREC_BANK(info->nrecmema); rank = NREC_RANK(info->nrecmema); rdwr = NREC_RDWR(info->nrecmema); -- cgit v1.2.3-70-g09d2 From cab4d27764d5a8654212b3e96eb0ae793aec5b94 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 11 Feb 2010 17:15:57 +0100 Subject: amd64_edac: Do not falsely trigger kerneloops An unfortunate "WARNING" in the message amd64_edac dumps when the system doesn't support DRAM ECC or ECC checking is not enabled in the BIOS used to trigger kerneloops which qualified the message as an OOPS thus misleading the users. See, e.g. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/422536 http://bugzilla.kernel.org/show_bug.cgi?id=15238 Downgrade the message level to KERN_NOTICE and fix the formulation. Cc: stable@kernel.org # .32.x Signed-off-by: Borislav Petkov Acked-by: Doug Thompson --- drivers/edac/amd64_edac.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/edac') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 000dc67b85b..3391e6739d0 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2658,10 +2658,11 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) * the memory system completely. A command line option allows to force-enable * hardware ECC later in amd64_enable_ecc_error_reporting(). */ -static const char *ecc_warning = - "WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n" - " Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n" - " Also, use of the override can cause unknown side effects.\n"; +static const char *ecc_msg = + "ECC disabled in the BIOS or no ECC capability, module will not load.\n" + " Either enable ECC checking or force module loading by setting " + "'ecc_enable_override'.\n" + " (Note that use of the override may cause unknown side effects.)\n"; static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) { @@ -2673,7 +2674,7 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); if (!ecc_enabled) - amd64_printk(KERN_WARNING, "This node reports that Memory ECC " + amd64_printk(KERN_NOTICE, "This node reports that Memory ECC " "is currently disabled, set F3x%x[22] (%s).\n", K8_NBCFG, pci_name(pvt->misc_f3_ctl)); else @@ -2681,13 +2682,13 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id); if (!nb_mce_en) - amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR " + amd64_printk(KERN_NOTICE, "NB MCE bank disabled, set MSR " "0x%08x[4] on node %d to enable.\n", MSR_IA32_MCG_CTL, pvt->mc_node_id); if (!ecc_enabled || !nb_mce_en) { if (!ecc_enable_override) { - amd64_printk(KERN_WARNING, "%s", ecc_warning); + amd64_printk(KERN_NOTICE, "%s", ecc_msg); return -ENODEV; } ecc_enable_override = 0; -- cgit v1.2.3-70-g09d2 From cff9279e4e8d6ff80a640dd6977c8f76aa01e1f8 Mon Sep 17 00:00:00 2001 From: Peter Tyser Date: Wed, 10 Feb 2010 13:56:36 -0800 Subject: edac: mpc85xx fix bad page calculation Commit b4846251727a38a7f248e41308c060995371dd05 ("edac: mpc85xx add mpc83xx support") accidentally broke how a chip select's first and last page addresses are calculated. The page addresses are being shifted too far right by PAGE_SHIFT. This results in errors such as: EDAC MPC85xx MC1: Err addr: 0x003075c0 EDAC MPC85xx MC1: PFN: 0x00000307 EDAC MPC85xx MC1: PFN out of range! EDAC MC1: INTERNAL ERROR: row out of range (4 >= 4) EDAC MC1: CE - no information available: INTERNAL ERROR The vaule of PAGE_SHIFT is already being taken into consideration during the calculation of the 'start' and 'end' variables, thus it is not necessary to account for it again when setting a chip select's first and last page address. Signed-off-by: Peter Tyser Signed-off-by: Doug Thompson Cc: Ira W. Snyder Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/edac/mpc85xx_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/edac') diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index cf27402af97..e24a87fe3b9 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -804,8 +804,8 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) end <<= (24 - PAGE_SHIFT); end |= (1 << (24 - PAGE_SHIFT)) - 1; - csrow->first_page = start >> PAGE_SHIFT; - csrow->last_page = end >> PAGE_SHIFT; + csrow->first_page = start; + csrow->last_page = end; csrow->nr_pages = end + 1 - start; csrow->grain = 8; csrow->mtype = mtype; -- cgit v1.2.3-70-g09d2 From f8c63345b498a8590e8e87a5990a36cdf89636df Mon Sep 17 00:00:00 2001 From: Peter Tyser Date: Wed, 10 Feb 2010 13:56:37 -0800 Subject: edac: mpc85xx fix build regression by removing unused debug code Some unused, unsupported debug code existed in the mpc85xx EDAC driver that resulted in a build failure when CONFIG_EDAC_DEBUG was defined: drivers/edac/mpc85xx_edac.c: In function 'mpc85xx_mc_err_probe': drivers/edac/mpc85xx_edac.c:1031: error: implicit declaration of function 'edac_mc_register_mcidev_debug' drivers/edac/mpc85xx_edac.c:1031: error: 'debug_attr' undeclared (first use in this function) drivers/edac/mpc85xx_edac.c:1031: error: (Each undeclared identifier is reported only once drivers/edac/mpc85xx_edac.c:1031: error: for each function it appears in.) Signed-off-by: Peter Tyser Signed-off-by: Doug Thompson Signed-off-by: Linus Torvalds --- drivers/edac/mpc85xx_edac.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/edac') diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index e24a87fe3b9..ecd5928d711 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -892,10 +892,6 @@ static int __devinit mpc85xx_mc_err_probe(struct of_device *op, mpc85xx_init_csrows(mci); -#ifdef CONFIG_EDAC_DEBUG - edac_mc_register_mcidev_debug((struct attribute **)debug_attr); -#endif - /* store the original error disable bits */ orig_ddr_err_disable = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE); -- cgit v1.2.3-70-g09d2