From 505422517d3f126bb939439e9d15dece94e11d2c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 11 Dec 2009 18:14:40 +0100 Subject: x86, msr: Add support for non-contiguous cpumasks The current rd/wrmsr_on_cpus helpers assume that the supplied cpumasks are contiguous. However, there are machines out there like some K8 multinode Opterons which have a non-contiguous core enumeration on each node (e.g. cores 0,2 on node 0 instead of 0,1), see http://www.gossamer-threads.com/lists/linux/kernel/1160268. This patch fixes out-of-bounds writes (see URL above) by adding per-CPU msr structs which are used on the respective cores. Additionally, two helpers, msrs_{alloc,free}, are provided for use by the callers of the MSR accessors. Cc: H. Peter Anvin Cc: Mauro Carvalho Chehab Cc: Aristeu Rozanski Cc: Randy Dunlap Cc: Doug Thompson Signed-off-by: Borislav Petkov LKML-Reference: <20091211171440.GD31998@aftab> Signed-off-by: H. Peter Anvin --- drivers/edac/amd64_edac.c | 46 +++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) (limited to 'drivers/edac/amd64_edac.c') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5fdd6daa40e..df5b68433f3 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -13,6 +13,8 @@ module_param(report_gart_errors, int, 0644); static int ecc_enable_override; module_param(ecc_enable_override, int, 0644); +static struct msr *msrs; + /* Lookup table for all possible MC control instances */ struct amd64_pvt; static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES]; @@ -2495,8 +2497,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, int nid) static bool amd64_nb_mce_bank_enabled_on_node(int nid) { cpumask_var_t mask; - struct msr *msrs; - int cpu, nbe, idx = 0; + int cpu, nbe; bool ret = false; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { @@ -2507,32 +2508,22 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid) get_cpus_on_this_dct_cpumask(mask, nid); - msrs = kzalloc(sizeof(struct msr) * cpumask_weight(mask), GFP_KERNEL); - if (!msrs) { - amd64_printk(KERN_WARNING, "%s: error allocating msrs\n", - __func__); - free_cpumask_var(mask); - return false; - } - rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs); for_each_cpu(cpu, mask) { - nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE; + struct msr *reg = per_cpu_ptr(msrs, cpu); + nbe = reg->l & K8_MSR_MCGCTL_NBE; debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", - cpu, msrs[idx].q, + cpu, reg->q, (nbe ? "enabled" : "disabled")); if (!nbe) goto out; - - idx++; } ret = true; out: - kfree(msrs); free_cpumask_var(mask); return ret; } @@ -2540,8 +2531,7 @@ out: static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) { cpumask_var_t cmask; - struct msr *msrs = NULL; - int cpu, idx = 0; + int cpu; if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) { amd64_printk(KERN_WARNING, "%s: error allocating mask\n", @@ -2551,34 +2541,27 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id); - msrs = kzalloc(sizeof(struct msr) * cpumask_weight(cmask), GFP_KERNEL); - if (!msrs) { - amd64_printk(KERN_WARNING, "%s: error allocating msrs\n", - __func__); - return -ENOMEM; - } - rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); for_each_cpu(cpu, cmask) { + struct msr *reg = per_cpu_ptr(msrs, cpu); + if (on) { - if (msrs[idx].l & K8_MSR_MCGCTL_NBE) + if (reg->l & K8_MSR_MCGCTL_NBE) pvt->flags.ecc_report = 1; - msrs[idx].l |= K8_MSR_MCGCTL_NBE; + reg->l |= K8_MSR_MCGCTL_NBE; } else { /* * Turn off ECC reporting only when it was off before */ if (!pvt->flags.ecc_report) - msrs[idx].l &= ~K8_MSR_MCGCTL_NBE; + reg->l &= ~K8_MSR_MCGCTL_NBE; } - idx++; } wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); - kfree(msrs); free_cpumask_var(cmask); return 0; @@ -3036,6 +3019,8 @@ static int __init amd64_edac_init(void) if (cache_k8_northbridges() < 0) return err; + msrs = msrs_alloc(); + err = pci_register_driver(&amd64_pci_driver); if (err) return err; @@ -3071,6 +3056,9 @@ static void __exit amd64_edac_exit(void) edac_pci_release_generic_ctl(amd64_ctl_pci); pci_unregister_driver(&amd64_pci_driver); + + msrs_free(msrs); + msrs = NULL; } module_init(amd64_edac_init); -- cgit v1.2.3-70-g09d2 From 603adaf6b3e37450235f0ddb5986b961b3146a79 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 21 Dec 2009 14:52:53 +0100 Subject: amd64_edac: fix K8 chip select reporting Fix the case when amd64_debug_display_dimm_sizes() reports only half the amount of DRAM on it because it doesn't account for when the single DCT operates in 128-bit mode and merges chip selects from different DIMMs. Reported-by: Johannes Hirte LKML-Reference: <200912112202.48173.johannes.hirte@fem.tu-ilmenau.de> Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/edac/amd64_edac.c') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index df5b68433f3..784cc5a1ebc 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1700,11 +1700,14 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, */ static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt) { - int dimm, size0, size1; + int dimm, size0, size1, factor = 0; u32 dbam; u32 *dcsb; if (boot_cpu_data.x86 == 0xf) { + if (pvt->dclr0 & F10_WIDTH_128) + factor = 1; + /* K8 families < revF not supported yet */ if (pvt->ext_model < K8_REV_F) return; @@ -1732,7 +1735,8 @@ static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt) size1 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam)); edac_printk(KERN_DEBUG, EDAC_MC, " %d: %5dMB %d: %5dMB\n", - dimm * 2, size0, dimm * 2 + 1, size1); + dimm * 2, size0 << factor, + dimm * 2 + 1, size1 << factor); } } -- cgit v1.2.3-70-g09d2 From 8f68ed9728193b1f2fb53ba06031b06bd8b3d1b4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 21 Dec 2009 15:15:59 +0100 Subject: amd64_edac: fix driver instance freeing Fix use-after-free errors by pushing all memory-freeing calls to the end of amd64_remove_one_instance(). Reported-by: Darren Jenkins LKML-Reference: <1261370306.11354.52.camel@ICE-BOX> Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/edac/amd64_edac.c') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 784cc5a1ebc..fb0d36b4741 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2929,16 +2929,15 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) amd64_free_mc_sibling_devices(pvt); - kfree(pvt); - mci->pvt_info = NULL; - - mci_lookup[pvt->mc_node_id] = NULL; - /* unregister from EDAC MCE */ amd_report_gart_errors(false); amd_unregister_ecc_decoder(amd64_decode_bus_error); /* Free the EDAC CORE resources */ + mci->pvt_info = NULL; + mci_lookup[pvt->mc_node_id] = NULL; + + kfree(pvt); edac_mc_free(mci); } -- cgit v1.2.3-70-g09d2 From 56b34b91e22313294154cee0c16e294cf8a45b61 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 21 Dec 2009 18:13:01 +0100 Subject: amd64_edac: make driver loading more robust Currently, the module does not initialize fully when the DIMMs aren't ECC but remains still loaded. Propagate the error when no instance of the driver is properly initialized and prevent further loading. Reorganize and polish error handling in amd64_edac_init() while at it. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/edac/amd64_edac.c') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index fb0d36b4741..a8af27a7482 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3014,25 +3014,29 @@ static void amd64_setup_pci_device(void) static int __init amd64_edac_init(void) { int nb, err = -ENODEV; + bool load_ok = false; edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n"); opstate_init(); if (cache_k8_northbridges() < 0) - return err; + goto err_ret; msrs = msrs_alloc(); + if (!msrs) + goto err_ret; err = pci_register_driver(&amd64_pci_driver); if (err) - return err; + goto err_pci; /* * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd * amd64_pvt structs. These will be used in the 2nd stage init function * to finish initialization of the MC instances. */ + err = -ENODEV; for (nb = 0; nb < num_k8_northbridges; nb++) { if (!pvt_lookup[nb]) continue; @@ -3040,16 +3044,21 @@ static int __init amd64_edac_init(void) err = amd64_init_2nd_stage(pvt_lookup[nb]); if (err) goto err_2nd_stage; - } - amd64_setup_pci_device(); + load_ok = true; + } - return 0; + if (load_ok) { + amd64_setup_pci_device(); + return 0; + } err_2nd_stage: - debugf0("2nd stage failed\n"); pci_unregister_driver(&amd64_pci_driver); - +err_pci: + msrs_free(msrs); + msrs = NULL; +err_ret: return err; } -- cgit v1.2.3-70-g09d2 From 43f5e68733cfe8bed3c30b5c14c4993dffb29766 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 21 Dec 2009 18:55:18 +0100 Subject: amd64_edac: fix forcing module load/unload Clear the override flag after force-loading the module. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/edac/amd64_edac.c') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index a8af27a7482..63c04d31cd7 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2690,9 +2690,8 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) amd64_printk(KERN_WARNING, "%s", ecc_warning); return -ENODEV; } - } else - /* CLEAR the override, since BIOS controlled it */ ecc_enable_override = 0; + } return 0; } -- cgit v1.2.3-70-g09d2 From 92389102b6832fc779f5c37f1d9e3eaadea6e059 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 21 Dec 2009 19:21:41 +0100 Subject: amd64_edac: restrict PCI config space access Do not access F2x19[0,4] on K8 since they're undefined there. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/edac/amd64_edac.c') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 63c04d31cd7..c5facd951dd 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2349,7 +2349,7 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0); - if (!dct_ganging_enabled(pvt)) { + if (!dct_ganging_enabled(pvt) && boot_cpu_data.x86 >= 0x10) { amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1); amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1); } -- cgit v1.2.3-70-g09d2 From 926311fd7dabcd284a1e8a87a3e2bb5f929c0c60 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 11 Jan 2010 20:58:21 +0100 Subject: amd64_edac: Ensure index stays within bounds in amd64_get_scrub_rate Add a missing iterator variable thus fixing the conditional of the for-loop in amd64_get_scrub_rate(). Signed-off-by: Roel Kluin Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/edac/amd64_edac.c') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c5facd951dd..000dc67b85b 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -197,7 +197,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) edac_printk(KERN_DEBUG, EDAC_MC, "pci-read, sdram scrub control value: %d \n", scrubval); - for (i = 0; ARRAY_SIZE(scrubrates); i++) { + for (i = 0; i < ARRAY_SIZE(scrubrates); i++) { if (scrubrates[i].scrubval == scrubval) { *bw = scrubrates[i].bandwidth; status = 0; -- cgit v1.2.3-70-g09d2