From a654e5ee4f2213844d23361eda4955fe9efaf35f Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Dec 2011 11:25:41 +0800 Subject: ACPI, APEI, GHES: Add PCIe AER recovery support aer_recover_queue() is called when recoverable PCIe AER errors are notified by firmware to do the recovery work. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b8e08cb67a1..511b971d114 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #include #include #include @@ -476,6 +478,27 @@ static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) } #endif } +#ifdef CONFIG_ACPI_APEI_PCIEAER + else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, + CPER_SEC_PCIE)) { + struct cper_sec_pcie *pcie_err; + pcie_err = (struct cper_sec_pcie *)(gdata+1); + if (sev == GHES_SEV_RECOVERABLE && + sec_sev == GHES_SEV_RECOVERABLE && + pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && + pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { + unsigned int devfn; + int aer_severity; + devfn = PCI_DEVFN(pcie_err->device_id.device, + pcie_err->device_id.function); + aer_severity = cper_severity_to_aer(sev); + aer_recover_queue(pcie_err->device_id.segment, + pcie_err->device_id.bus, + devfn, aer_severity); + } + + } +#endif } } -- cgit v1.2.3-70-g09d2 From 46b91e379f7180b482b789fbe615946d91e3a07f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 Dec 2011 11:25:42 +0800 Subject: ACPI, APEI, Print resource errors in conventional format Use the normal %pR-like format for MMIO and I/O port ranges. Signed-off-by: Bjorn Helgaas Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 8 ++++---- drivers/acpi/apei/einj.c | 11 ++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 61540360d5c..3492896b96f 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -460,9 +460,9 @@ int apei_resources_request(struct apei_resources *resources, desc); if (!r) { pr_err(APEI_PFX - "Can not request iomem region <%016llx-%016llx> for GARs.\n", + "Can not request [mem %#010llx-%#010llx] for %s registers\n", (unsigned long long)res->start, - (unsigned long long)res->end); + (unsigned long long)res->end - 1, desc); res_bak = res; goto err_unmap_iomem; } @@ -472,9 +472,9 @@ int apei_resources_request(struct apei_resources *resources, r = request_region(res->start, res->end - res->start, desc); if (!r) { pr_err(APEI_PFX - "Can not request ioport region <%016llx-%016llx> for GARs.\n", + "Can not request [io %#06llx-%#06llx] for %s registers\n", (unsigned long long)res->start, - (unsigned long long)res->end); + (unsigned long long)res->end - 1, desc); res_bak = res; goto err_unmap_ioport; } diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 589b96c3870..d25390274d7 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -209,9 +209,10 @@ static int __einj_error_trigger(u64 trigger_paddr) "APEI EINJ Trigger Table"); if (!r) { pr_err(EINJ_PFX - "Can not request iomem region <%016llx-%016llx> for Trigger table.\n", + "Can not request [mem %#010llx-%#010llx] for Trigger table\n", (unsigned long long)trigger_paddr, - (unsigned long long)trigger_paddr+sizeof(*trigger_tab)); + (unsigned long long)trigger_paddr + + sizeof(*trigger_tab) - 1); goto out; } trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab)); @@ -232,9 +233,9 @@ static int __einj_error_trigger(u64 trigger_paddr) "APEI EINJ Trigger Table"); if (!r) { pr_err(EINJ_PFX -"Can not request iomem region <%016llx-%016llx> for Trigger Table Entry.\n", - (unsigned long long)trigger_paddr+sizeof(*trigger_tab), - (unsigned long long)trigger_paddr + table_size); +"Can not request [mem %#010llx-%#010llx] for Trigger Table Entry\n", + (unsigned long long)trigger_paddr + sizeof(*trigger_tab), + (unsigned long long)trigger_paddr + table_size - 1); goto out_rel_header; } iounmap(trigger_tab); -- cgit v1.2.3-70-g09d2 From ad6861547b52ad7c31eacc336b79ac91d7fded75 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Dec 2011 11:25:43 +0800 Subject: ACPI, APEI, Remove table not found message Because APEI tables are optional, these message may confuse users, for example, https://bugs.launchpad.net/ubuntu/+source/linux/+bug/599715 Reported-by: Bjorn Helgaas Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/einj.c | 5 ++--- drivers/acpi/apei/erst.c | 5 ++--- drivers/acpi/apei/hest.c | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index d25390274d7..43eeb2e6e63 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -466,10 +466,9 @@ static int __init einj_init(void) status = acpi_get_table(ACPI_SIG_EINJ, 0, (struct acpi_table_header **)&einj_tab); - if (status == AE_NOT_FOUND) { - pr_info(EINJ_PFX "Table is not found!\n"); + if (status == AE_NOT_FOUND) return -ENODEV; - } else if (ACPI_FAILURE(status)) { + else if (ACPI_FAILURE(status)) { const char *msg = acpi_format_exception(status); pr_err(EINJ_PFX "Failed to get table, %s\n", msg); return -EINVAL; diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 631b9477b99..8e8d786c5d2 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1125,10 +1125,9 @@ static int __init erst_init(void) status = acpi_get_table(ACPI_SIG_ERST, 0, (struct acpi_table_header **)&erst_tab); - if (status == AE_NOT_FOUND) { - pr_info(ERST_PFX "Table is not found!\n"); + if (status == AE_NOT_FOUND) goto err; - } else if (ACPI_FAILURE(status)) { + else if (ACPI_FAILURE(status)) { const char *msg = acpi_format_exception(status); pr_err(ERST_PFX "Failed to get table, %s\n", msg); rc = -EINVAL; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 05fee06f4d6..f709269d493 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -221,10 +221,9 @@ void __init acpi_hest_init(void) status = acpi_get_table(ACPI_SIG_HEST, 0, (struct acpi_table_header **)&hest_tab); - if (status == AE_NOT_FOUND) { - pr_info(HEST_PFX "Table not found.\n"); + if (status == AE_NOT_FOUND) goto err; - } else if (ACPI_FAILURE(status)) { + else if (ACPI_FAILURE(status)) { const char *msg = acpi_format_exception(status); pr_err(HEST_PFX "Failed to get table, %s\n", msg); rc = -EINVAL; -- cgit v1.2.3-70-g09d2 From 5ba82ab534a325d310fe02af1c149f1072792c7b Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Dec 2011 11:25:44 +0800 Subject: ACPI, APEI, GHES, Distinguish interleaved error report in kernel log In most cases, printk only guarantees messages from different printk calling will not be interleaved between each other. But, one APEI GHES hardware error report will involve multiple printk calling, normally each for one line. So it is possible that the hardware error report comes from different generic hardware error source will be interleaved. In this patch, a sequence number is prefixed to each line of error report. So that, even if they are interleaved, they still can be distinguished by the prefixed sequence number. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 511b971d114..9dcb2d86aea 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -506,16 +506,22 @@ static void __ghes_print_estatus(const char *pfx, const struct acpi_hest_generic *generic, const struct acpi_hest_generic_status *estatus) { + static atomic_t seqno; + unsigned int curr_seqno; + char pfx_seq[64]; + if (pfx == NULL) { if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) - pfx = KERN_WARNING HW_ERR; + pfx = KERN_WARNING; else - pfx = KERN_ERR HW_ERR; + pfx = KERN_ERR; } + curr_seqno = atomic_inc_return(&seqno); + snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", - pfx, generic->header.source_id); - apei_estatus_print(pfx, estatus); + pfx_seq, generic->header.source_id); + apei_estatus_print(pfx_seq, estatus); } static int ghes_print_estatus(const char *pfx, @@ -798,7 +804,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) if (sev_global >= GHES_SEV_PANIC) { oops_begin(); - __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic, + __ghes_print_estatus(KERN_EMERG, ghes_global->generic, ghes_global->estatus); /* reboot to log the error! */ if (panic_timeout == 0) -- cgit v1.2.3-70-g09d2 From 46d12f0bcb17b2de89a059114349d472b7eb1783 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Dec 2011 11:25:45 +0800 Subject: ACPI, APEI, Printk queued error record before panic Because printk is not safe inside NMI handler, the recoverable error records received in NMI handler will be queued to be printked in a delayed IRQ context via irq_work. If a fatal error occurs after the recoverable error and before the irq_work processed, we lost a error report. To solve the issue, the queued error records are printked in NMI handler if system will go panic. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 53 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 9 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9dcb2d86aea..aaf36090de1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -740,26 +740,34 @@ static int ghes_notify_sci(struct notifier_block *this, return ret; } +static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) +{ + struct llist_node *next, *tail = NULL; + + while (llnode) { + next = llnode->next; + llnode->next = tail; + tail = llnode; + llnode = next; + } + + return tail; +} + static void ghes_proc_in_irq(struct irq_work *irq_work) { - struct llist_node *llnode, *next, *tail = NULL; + struct llist_node *llnode, *next; struct ghes_estatus_node *estatus_node; struct acpi_hest_generic *generic; struct acpi_hest_generic_status *estatus; u32 len, node_len; + llnode = llist_del_all(&ghes_estatus_llist); /* * Because the time order of estatus in list is reversed, * revert it back to proper order. */ - llnode = llist_del_all(&ghes_estatus_llist); - while (llnode) { - next = llnode->next; - llnode->next = tail; - tail = llnode; - llnode = next; - } - llnode = tail; + llnode = llist_nodes_reverse(llnode); while (llnode) { next = llnode->next; estatus_node = llist_entry(llnode, struct ghes_estatus_node, @@ -779,6 +787,32 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) } } +static void ghes_print_queued_estatus(void) +{ + struct llist_node *llnode; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic *generic; + struct acpi_hest_generic_status *estatus; + u32 len, node_len; + + llnode = llist_del_all(&ghes_estatus_llist); + /* + * Because the time order of estatus in list is reversed, + * revert it back to proper order. + */ + llnode = llist_nodes_reverse(llnode); + while (llnode) { + estatus_node = llist_entry(llnode, struct ghes_estatus_node, + llnode); + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + len = apei_estatus_len(estatus); + node_len = GHES_ESTATUS_NODE_LEN(len); + generic = estatus_node->generic; + ghes_print_estatus(NULL, generic, estatus); + llnode = llnode->next; + } +} + static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { struct ghes *ghes, *ghes_global = NULL; @@ -804,6 +838,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) if (sev_global >= GHES_SEV_PANIC) { oops_begin(); + ghes_print_queued_estatus(); __ghes_print_estatus(KERN_EMERG, ghes_global->generic, ghes_global->estatus); /* reboot to log the error! */ -- cgit v1.2.3-70-g09d2 From fdea163d8c17ba08814142259a467ba3e899010d Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Dec 2011 11:25:47 +0800 Subject: ACPI, APEI, EINJ, Fix resource conflict on some machine Some APEI firmware implementation will access injected address specified in param1 to trigger the error when injecting memory error. This will cause resource conflict with RAM. On one of our testing machine, if injecting at memory address 0x10000000, the following error will be reported in dmesg: APEI: Can not request iomem region <0000000010000000-0000000010000008> for GARs. This patch removes the injecting memory address range from trigger table resources to avoid conflict. Signed-off-by: Huang Ying Tested-by: Tony Luck Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 11 +++++++++++ drivers/acpi/apei/apei-internal.h | 3 +++ drivers/acpi/apei/einj.c | 24 ++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 3492896b96f..f2c5062e2b3 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -421,6 +421,17 @@ static int apei_resources_merge(struct apei_resources *resources1, return 0; } +int apei_resources_add(struct apei_resources *resources, + unsigned long start, unsigned long size, + bool iomem) +{ + if (iomem) + return apei_res_add(&resources->iomem, start, size); + else + return apei_res_add(&resources->ioport, start, size); +} +EXPORT_SYMBOL_GPL(apei_resources_add); + /* * EINJ has two groups of GARs (EINJ table entry and trigger table * entry), so common resources are subtracted from the trigger table diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index f57050e7a5e..d778edd34fb 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -95,6 +95,9 @@ static inline void apei_resources_init(struct apei_resources *resources) } void apei_resources_fini(struct apei_resources *resources); +int apei_resources_add(struct apei_resources *resources, + unsigned long start, unsigned long size, + bool iomem); int apei_resources_sub(struct apei_resources *resources1, struct apei_resources *resources2); int apei_resources_request(struct apei_resources *resources, diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 43eeb2e6e63..4fdc8a3b4f6 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -195,7 +195,8 @@ static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab) } /* Execute instructions in trigger error action table */ -static int __einj_error_trigger(u64 trigger_paddr) +static int __einj_error_trigger(u64 trigger_paddr, u32 type, + u64 param1, u64 param2) { struct acpi_einj_trigger *trigger_tab = NULL; struct apei_exec_context trigger_ctx; @@ -256,6 +257,25 @@ static int __einj_error_trigger(u64 trigger_paddr) rc = apei_resources_sub(&trigger_resources, &einj_resources); if (rc) goto out_fini; + /* + * Some firmware will access target address specified in + * param1 to trigger the error when injecting memory error. + * This will cause resource conflict with regular memory. So + * remove it from trigger table resources. + */ + if (param_extension && (type & 0x0038) && param2) { + struct apei_resources addr_resources; + apei_resources_init(&addr_resources); + rc = apei_resources_add(&addr_resources, + param1 & param2, + ~param2 + 1, true); + if (rc) + goto out_fini; + rc = apei_resources_sub(&trigger_resources, &addr_resources); + apei_resources_fini(&addr_resources); + if (rc) + goto out_fini; + } rc = apei_resources_request(&trigger_resources, "APEI EINJ Trigger"); if (rc) goto out_fini; @@ -325,7 +345,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) if (rc) return rc; trigger_paddr = apei_exec_ctx_get_output(&ctx); - rc = __einj_error_trigger(trigger_paddr); + rc = __einj_error_trigger(trigger_paddr, type, param1, param2); if (rc) return rc; rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION); -- cgit v1.2.3-70-g09d2 From b4e008dc53a31cb4bf6a12d9dbaf1d5c6070a838 Mon Sep 17 00:00:00 2001 From: "Xiao, Hui" Date: Thu, 8 Dec 2011 11:25:48 +0800 Subject: ACPI, APEI, EINJ, Refine the fix of resource conflict Current fix for resource conflict is to remove the address region from trigger resource, which is highly relies on valid user input. This patch is trying to avoid such potential issues by fetching the exact address region from trigger action table entry. Signed-off-by: Xiao, Hui Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/einj.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 4fdc8a3b4f6..6e6512e68a2 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -194,6 +194,26 @@ static int einj_check_trigger_header(struct acpi_einj_trigger *trigger_tab) return 0; } +static struct acpi_generic_address *einj_get_trigger_parameter_region( + struct acpi_einj_trigger *trigger_tab, u64 param1, u64 param2) +{ + int i; + struct acpi_whea_header *entry; + + entry = (struct acpi_whea_header *) + ((char *)trigger_tab + sizeof(struct acpi_einj_trigger)); + for (i = 0; i < trigger_tab->entry_count; i++) { + if (entry->action == ACPI_EINJ_TRIGGER_ERROR && + entry->instruction == ACPI_EINJ_WRITE_REGISTER_VALUE && + entry->register_region.space_id == + ACPI_ADR_SPACE_SYSTEM_MEMORY && + (entry->register_region.address & param2) == (param1 & param2)) + return &entry->register_region; + entry++; + } + + return NULL; +} /* Execute instructions in trigger error action table */ static int __einj_error_trigger(u64 trigger_paddr, u32 type, u64 param1, u64 param2) @@ -205,6 +225,7 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type, struct resource *r; u32 table_size; int rc = -EIO; + struct acpi_generic_address *trigger_param_region = NULL; r = request_mem_region(trigger_paddr, sizeof(*trigger_tab), "APEI EINJ Trigger Table"); @@ -266,12 +287,17 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type, if (param_extension && (type & 0x0038) && param2) { struct apei_resources addr_resources; apei_resources_init(&addr_resources); - rc = apei_resources_add(&addr_resources, - param1 & param2, - ~param2 + 1, true); - if (rc) - goto out_fini; - rc = apei_resources_sub(&trigger_resources, &addr_resources); + trigger_param_region = einj_get_trigger_parameter_region( + trigger_tab, param1, param2); + if (trigger_param_region) { + rc = apei_resources_add(&addr_resources, + trigger_param_region->address, + trigger_param_region->bit_width/8, true); + if (rc) + goto out_fini; + rc = apei_resources_sub(&trigger_resources, + &addr_resources); + } apei_resources_fini(&addr_resources); if (rc) goto out_fini; -- cgit v1.2.3-70-g09d2 From 4134b8c8811f23aa8a281db50dcee64dda414736 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 8 Dec 2011 11:25:50 +0800 Subject: ACPI, APEI, Resolve false conflict between ACPI NVS and APEI Some firmware will access memory in ACPI NVS region via APEI. That is, instructions in APEI ERST/EINJ table will read/write ACPI NVS region. The original resource conflict checking in APEI code will check memory/ioport accessed by APEI via general resource management mech. But ACPI NVS region is marked as busy already, so that the false resource conflict will prevent APEI ERST/EINJ to work. To fix this, this patch excludes ACPI NVS regions when APEI components request resources. So that they will not conflict with ACPI NVS regions. Reported-and-tested-by: Pavel Ivanov Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index f2c5062e2b3..4abb6c74a93 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -449,8 +449,19 @@ int apei_resources_sub(struct apei_resources *resources1, } EXPORT_SYMBOL_GPL(apei_resources_sub); +static int apei_get_nvs_callback(__u64 start, __u64 size, void *data) +{ + struct apei_resources *resources = data; + return apei_res_add(&resources->iomem, start, size); +} + +static int apei_get_nvs_resources(struct apei_resources *resources) +{ + return acpi_nvs_for_each_region(apei_get_nvs_callback, resources); +} + /* - * IO memory/port rersource management mechanism is used to check + * IO memory/port resource management mechanism is used to check * whether memory/port area used by GARs conflicts with normal memory * or IO memory/port of devices. */ @@ -459,12 +470,26 @@ int apei_resources_request(struct apei_resources *resources, { struct apei_res *res, *res_bak = NULL; struct resource *r; + struct apei_resources nvs_resources; int rc; rc = apei_resources_sub(resources, &apei_resources_all); if (rc) return rc; + /* + * Some firmware uses ACPI NVS region, that has been marked as + * busy, so exclude it from APEI resources to avoid false + * conflict. + */ + apei_resources_init(&nvs_resources); + rc = apei_get_nvs_resources(&nvs_resources); + if (rc) + goto res_fini; + rc = apei_resources_sub(resources, &nvs_resources); + if (rc) + goto res_fini; + rc = -EINVAL; list_for_each_entry(res, &resources->iomem, list) { r = request_mem_region(res->start, res->end - res->start, @@ -511,6 +536,8 @@ err_unmap_iomem: break; release_mem_region(res->start, res->end - res->start); } +res_fini: + apei_resources_fini(&nvs_resources); return rc; } EXPORT_SYMBOL_GPL(apei_resources_request); -- cgit v1.2.3-70-g09d2 From 700130b41f4ee54520ac2ef2f7f1d072789711a4 Mon Sep 17 00:00:00 2001 From: Myron Stowe Date: Mon, 7 Nov 2011 16:23:41 -0700 Subject: ACPI APEI: Convert atomicio routines APEI needs memory access in interrupt context. The obvious choice is acpi_read(), but originally it couldn't be used in interrupt context because it makes temporary mappings with ioremap(). Therefore, we added drivers/acpi/atomicio.c, which provides: acpi_pre_map_gar() -- ioremap in process context acpi_atomic_read() -- memory access in interrupt context acpi_post_unmap_gar() -- iounmap Later we added acpi_os_map_generic_address() (2971852) and enhanced acpi_read() so it works in interrupt context as long as the address has been previously mapped (620242a). Now this sequence: acpi_os_map_generic_address() -- ioremap in process context acpi_read()/apei_read() -- now OK in interrupt context acpi_os_unmap_generic_address() is equivalent to what atomicio.c provides. This patch introduces apei_read() and apei_write(), which currently are functional equivalents of acpi_read() and acpi_write(). This is mainly proactive, to prevent APEI breakages if acpi_read() and acpi_write() are ever augmented to support the 'bit_offset' field of GAS, as APEI's __apei_exec_write_register() precludes splitting up functionality related to 'bit_offset' and APEI's 'mask' (see its APEI_EXEC_PRESERVE_REGISTER block). With apei_read() and apei_write() in place, usages of atomicio routines are converted to apei_read()/apei_write() and existing calls within osl.c and the CA, based on the re-factoring that was done in an earlier patch series - http://marc.info/?l=linux-acpi&m=128769263327206&w=2: acpi_pre_map_gar() --> acpi_os_map_generic_address() acpi_post_unmap_gar() --> acpi_os_unmap_generic_address() acpi_atomic_read() --> apei_read() acpi_atomic_write() --> apei_write() Note that acpi_read() and acpi_write() currently use 'bit_width' for accessing GARs which seems incorrect. 'bit_width' is the size of the register, while 'access_width' is the size of the access the processor must generate on the bus. The 'access_width' may be larger, for example, if the hardware only supports 32-bit or 64-bit reads. I wanted to minimize any possible impacts with this patch series so I did *not* change this behavior. Signed-off-by: Myron Stowe Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 102 +++++++++++++++++++++++++++++++++++--- drivers/acpi/apei/apei-internal.h | 3 ++ drivers/acpi/apei/ghes.c | 10 ++-- 3 files changed, 104 insertions(+), 11 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 4abb6c74a93..e45350cb6ac 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -34,13 +34,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include "apei-internal.h" @@ -70,7 +70,7 @@ int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val) { int rc; - rc = acpi_atomic_read(val, &entry->register_region); + rc = apei_read(val, &entry->register_region); if (rc) return rc; *val >>= entry->register_region.bit_offset; @@ -116,13 +116,13 @@ int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val) val <<= entry->register_region.bit_offset; if (entry->flags & APEI_EXEC_PRESERVE_REGISTER) { u64 valr = 0; - rc = acpi_atomic_read(&valr, &entry->register_region); + rc = apei_read(&valr, &entry->register_region); if (rc) return rc; valr &= ~(entry->mask << entry->register_region.bit_offset); val |= valr; } - rc = acpi_atomic_write(val, &entry->register_region); + rc = apei_write(val, &entry->register_region); return rc; } @@ -243,7 +243,7 @@ static int pre_map_gar_callback(struct apei_exec_context *ctx, u8 ins = entry->instruction; if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER) - return acpi_pre_map_gar(&entry->register_region); + return acpi_os_map_generic_address(&entry->register_region); return 0; } @@ -276,7 +276,7 @@ static int post_unmap_gar_callback(struct apei_exec_context *ctx, u8 ins = entry->instruction; if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER) - acpi_post_unmap_gar(&entry->register_region); + acpi_os_unmap_generic_address(&entry->register_region); return 0; } @@ -591,6 +591,96 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr) return 0; } +/* read GAR in interrupt (including NMI) or process context */ +int apei_read(u64 *val, struct acpi_generic_address *reg) +{ + int rc; + u64 address; + u32 tmp, width = reg->bit_width; + acpi_status status; + + rc = apei_check_gar(reg, &address); + if (rc) + return rc; + + if (width == 64) + width = 32; /* Break into two 32-bit transfers */ + + *val = 0; + switch(reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + status = acpi_os_read_memory((acpi_physical_address) + address, &tmp, width); + if (ACPI_FAILURE(status)) + return -EIO; + *val = tmp; + + if (reg->bit_width == 64) { + /* Read the top 32 bits */ + status = acpi_os_read_memory((acpi_physical_address) + (address + 4), &tmp, 32); + if (ACPI_FAILURE(status)) + return -EIO; + *val |= ((u64)tmp << 32); + } + break; + case ACPI_ADR_SPACE_SYSTEM_IO: + status = acpi_os_read_port(address, (u32 *)val, reg->bit_width); + if (ACPI_FAILURE(status)) + return -EIO; + break; + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(apei_read); + +/* write GAR in interrupt (including NMI) or process context */ +int apei_write(u64 val, struct acpi_generic_address *reg) +{ + int rc; + u64 address; + u32 width = reg->bit_width; + acpi_status status; + + rc = apei_check_gar(reg, &address); + if (rc) + return rc; + + if (width == 64) + width = 32; /* Break into two 32-bit transfers */ + + switch (reg->space_id) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + status = acpi_os_write_memory((acpi_physical_address) + address, ACPI_LODWORD(val), + width); + if (ACPI_FAILURE(status)) + return -EIO; + + if (reg->bit_width == 64) { + status = acpi_os_write_memory((acpi_physical_address) + (address + 4), + ACPI_HIDWORD(val), 32); + if (ACPI_FAILURE(status)) + return -EIO; + } + break; + case ACPI_ADR_SPACE_SYSTEM_IO: + status = acpi_os_write_port(address, val, reg->bit_width); + if (ACPI_FAILURE(status)) + return -EIO; + break; + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(apei_write); + static int collect_res_callback(struct apei_exec_context *ctx, struct acpi_whea_header *entry, void *data) diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index d778edd34fb..cca240a3303 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -68,6 +68,9 @@ static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 actio /* IP has been set in instruction function */ #define APEI_EXEC_SET_IP 1 +int apei_read(u64 *val, struct acpi_generic_address *reg); +int apei_write(u64 val, struct acpi_generic_address *reg); + int __apei_exec_read_register(struct acpi_whea_header *entry, u64 *val); int __apei_exec_write_register(struct acpi_whea_header *entry, u64 val); int apei_exec_read_register(struct apei_exec_context *ctx, diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index aaf36090de1..b3207e16670 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -48,7 +49,6 @@ #include #include #include -#include #include #include #include @@ -301,7 +301,7 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) if (!ghes) return ERR_PTR(-ENOMEM); ghes->generic = generic; - rc = acpi_pre_map_gar(&generic->error_status_address); + rc = acpi_os_map_generic_address(&generic->error_status_address); if (rc) goto err_free; error_block_length = generic->error_block_length; @@ -321,7 +321,7 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) return ghes; err_unmap: - acpi_post_unmap_gar(&generic->error_status_address); + acpi_os_unmap_generic_address(&generic->error_status_address); err_free: kfree(ghes); return ERR_PTR(rc); @@ -330,7 +330,7 @@ err_free: static void ghes_fini(struct ghes *ghes) { kfree(ghes->estatus); - acpi_post_unmap_gar(&ghes->generic->error_status_address); + acpi_os_unmap_generic_address(&ghes->generic->error_status_address); } enum { @@ -401,7 +401,7 @@ static int ghes_read_estatus(struct ghes *ghes, int silent) u32 len; int rc; - rc = acpi_atomic_read(&buf_paddr, &g->error_status_address); + rc = apei_read(&buf_paddr, &g->error_status_address); if (rc) { if (!silent && printk_ratelimit()) pr_warning(FW_WARN GHES_PFX -- cgit v1.2.3-70-g09d2 From c130bd6f82e5dda28b1a19741c4c2fe269713199 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 17 Jan 2012 12:10:16 -0800 Subject: acpi/apei/einj: Add extensions to EINJ from rev 5.0 of acpi spec ACPI 5.0 provides extensions to the EINJ mechanism to specify the target for the error injection - by APICID for cpu related errors, by address for memory related errors, and by segment/bus/device/function for PCIe related errors. Also extensions for vendor specific error injections. Tested-by: Chen Gong Signed-off-by: Tony Luck Signed-off-by: Len Brown --- Documentation/acpi/apei/einj.txt | 55 ++++++++-- drivers/acpi/apei/einj.c | 224 ++++++++++++++++++++++++++++++++------- include/acpi/actbl1.h | 3 +- 3 files changed, 234 insertions(+), 48 deletions(-) (limited to 'drivers/acpi/apei') diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt index 5cc699ba545..e7cc3639721 100644 --- a/Documentation/acpi/apei/einj.txt +++ b/Documentation/acpi/apei/einj.txt @@ -47,20 +47,53 @@ directory apei/einj. The following files are provided. - param1 This file is used to set the first error parameter value. Effect of - parameter depends on error_type specified. For memory error, this is - physical memory address. Only available if param_extension module - parameter is specified. + parameter depends on error_type specified. - param2 This file is used to set the second error parameter value. Effect of - parameter depends on error_type specified. For memory error, this is - physical memory address mask. Only available if param_extension - module parameter is specified. + parameter depends on error_type specified. + +BIOS versions based in the ACPI 4.0 specification have limited options +to control where the errors are injected. Your BIOS may support an +extension (enabled with the param_extension=1 module parameter, or +boot command line einj.param_extension=1). This allows the address +and mask for memory injections to be specified by the param1 and +param2 files in apei/einj. + +BIOS versions using the ACPI 5.0 specification have more control over +the target of the injection. For processor related errors (type 0x1, +0x2 and 0x4) the APICID of the target should be provided using the +param1 file in apei/einj. For memory errors (type 0x8, 0x10 and 0x20) +the address is set using param1 with a mask in param2 (0x0 is equivalent +to all ones). For PCI express errors (type 0x40, 0x80 and 0x100) the +segment, bus, device and function are specified using param1: + + 31 24 23 16 15 11 10 8 7 0 + +-------------------------------------------------+ + | segment | bus | device | function | reserved | + +-------------------------------------------------+ + +An ACPI 5.0 BIOS may also allow vendor specific errors to be injected. +In this case a file named vendor will contain identifying information +from the BIOS that hopefully will allow an application wishing to use +the vendor specific extension to tell that they are running on a BIOS +that supports it. All vendor extensions have the 0x80000000 bit set in +error_type. A file vendor_flags controls the interpretation of param1 +and param2 (1 = PROCESSOR, 2 = MEMORY, 4 = PCI). See your BIOS vendor +documentation for details (and expect changes to this API if vendors +creativity in using this feature expands beyond our expectations). + +Example: +# cd /sys/kernel/debug/apei/einj +# cat available_error_type # See which errors can be injected +0x00000002 Processor Uncorrectable non-fatal +0x00000008 Memory Correctable +0x00000010 Memory Uncorrectable non-fatal +# echo 0x12345000 > param1 # Set memory address for injection +# echo 0xfffffffffffff000 > param2 # Mask - anywhere in this page +# echo 0x8 > error_type # Choose correctable memory error +# echo 1 > error_inject # Inject now -Injecting parameter support is a BIOS version specific extension, that -is, it only works on some BIOS version. If you want to use it, please -make sure your BIOS version has the proper support and specify -"param_extension=y" in module parameter. For more information about EINJ, please refer to ACPI specification -version 4.0, section 17.5. +version 4.0, section 17.5 and ACPI 5.0, section 18.6. diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 589b96c3870..31546fd2102 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -42,6 +42,42 @@ /* Firmware should respond within 1 milliseconds */ #define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC) +/* + * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. + */ +static int acpi5; + +struct set_error_type_with_address { + u32 type; + u32 vendor_extension; + u32 flags; + u32 apicid; + u64 memory_address; + u64 memory_address_range; + u32 pcie_sbdf; +}; +enum { + SETWA_FLAGS_APICID = 1, + SETWA_FLAGS_MEM = 2, + SETWA_FLAGS_PCIE_SBDF = 4, +}; + +/* + * Vendor extensions for platform specific operations + */ +struct vendor_error_type_extension { + u32 length; + u32 pcie_sbdf; + u16 vendor_id; + u16 device_id; + u8 rev_id; + u8 reserved[3]; +}; + +static u32 vendor_flags; +static struct debugfs_blob_wrapper vendor_blob; +static char vendor_dev[64]; + /* * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the * EINJ table through an unpublished extension. Use with caution as @@ -103,7 +139,14 @@ static struct apei_exec_ins_type einj_ins_type[] = { */ static DEFINE_MUTEX(einj_mutex); -static struct einj_parameter *einj_param; +static void *einj_param; + +#ifndef readq +static inline __u64 readq(volatile void __iomem *addr) +{ + return ((__u64)readl(addr+4) << 32) + readl(addr); +} +#endif #ifndef writeq static inline void writeq(__u64 val, volatile void __iomem *addr) @@ -158,10 +201,31 @@ static int einj_timedout(u64 *t) return 0; } -static u64 einj_get_parameter_address(void) +static void check_vendor_extension(u64 paddr, + struct set_error_type_with_address *v5param) +{ + int offset = readl(&v5param->vendor_extension); + struct vendor_error_type_extension *v; + u32 sbdf; + + if (!offset) + return; + v = ioremap(paddr + offset, sizeof(*v)); + if (!v) + return; + sbdf = readl(&v->pcie_sbdf); + sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n", + sbdf >> 24, (sbdf >> 16) & 0xff, + (sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7, + readw(&v->vendor_id), readw(&v->device_id), + readb(&v->rev_id)); + iounmap(v); +} + +static void *einj_get_parameter_address(void) { int i; - u64 paddr = 0; + u64 paddrv4 = 0, paddrv5 = 0; struct acpi_whea_header *entry; entry = EINJ_TAB_ENTRY(einj_tab); @@ -170,12 +234,40 @@ static u64 einj_get_parameter_address(void) entry->instruction == ACPI_EINJ_WRITE_REGISTER && entry->register_region.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - memcpy(&paddr, &entry->register_region.address, - sizeof(paddr)); + memcpy(&paddrv4, &entry->register_region.address, + sizeof(paddrv4)); + if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS && + entry->instruction == ACPI_EINJ_WRITE_REGISTER && + entry->register_region.space_id == + ACPI_ADR_SPACE_SYSTEM_MEMORY) + memcpy(&paddrv5, &entry->register_region.address, + sizeof(paddrv5)); entry++; } + if (paddrv5) { + struct set_error_type_with_address *v5param; + + v5param = ioremap(paddrv5, sizeof(*v5param)); + if (v5param) { + acpi5 = 1; + check_vendor_extension(paddrv5, v5param); + return v5param; + } + } + if (paddrv4) { + struct einj_parameter *v4param; + + v4param = ioremap(paddrv4, sizeof(*v4param)); + if (!v4param) + return 0; + if (readq(&v4param->reserved1) || readq(&v4param->reserved2)) { + iounmap(v4param); + return 0; + } + return v4param; + } - return paddr; + return 0; } /* do sanity check to trigger table */ @@ -293,12 +385,56 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) if (rc) return rc; apei_exec_ctx_set_input(&ctx, type); - rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE); - if (rc) - return rc; - if (einj_param) { - writeq(param1, &einj_param->param1); - writeq(param2, &einj_param->param2); + if (acpi5) { + struct set_error_type_with_address *v5param = einj_param; + + writel(type, &v5param->type); + if (type & 0x80000000) { + switch (vendor_flags) { + case SETWA_FLAGS_APICID: + writel(param1, &v5param->apicid); + break; + case SETWA_FLAGS_MEM: + writeq(param1, &v5param->memory_address); + writeq(param2, &v5param->memory_address_range); + break; + case SETWA_FLAGS_PCIE_SBDF: + writel(param1, &v5param->pcie_sbdf); + break; + } + writel(vendor_flags, &v5param->flags); + } else { + switch (type) { + case ACPI_EINJ_PROCESSOR_CORRECTABLE: + case ACPI_EINJ_PROCESSOR_UNCORRECTABLE: + case ACPI_EINJ_PROCESSOR_FATAL: + writel(param1, &v5param->apicid); + writel(SETWA_FLAGS_APICID, &v5param->flags); + break; + case ACPI_EINJ_MEMORY_CORRECTABLE: + case ACPI_EINJ_MEMORY_UNCORRECTABLE: + case ACPI_EINJ_MEMORY_FATAL: + writeq(param1, &v5param->memory_address); + writeq(param2, &v5param->memory_address_range); + writel(SETWA_FLAGS_MEM, &v5param->flags); + break; + case ACPI_EINJ_PCIX_CORRECTABLE: + case ACPI_EINJ_PCIX_UNCORRECTABLE: + case ACPI_EINJ_PCIX_FATAL: + writel(param1, &v5param->pcie_sbdf); + writel(SETWA_FLAGS_PCIE_SBDF, &v5param->flags); + break; + } + } + } else { + rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE); + if (rc) + return rc; + if (einj_param) { + struct einj_parameter *v4param = einj_param; + writeq(param1, &v4param->param1); + writeq(param2, &v4param->param2); + } } rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION); if (rc) @@ -408,15 +544,25 @@ static int error_type_set(void *data, u64 val) { int rc; u32 available_error_type = 0; + u32 tval, vendor; + + /* + * Vendor defined types have 0x80000000 bit set, and + * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE + */ + vendor = val & 0x80000000; + tval = val & 0x7fffffff; /* Only one error type can be specified */ - if (val & (val - 1)) - return -EINVAL; - rc = einj_get_available_error_type(&available_error_type); - if (rc) - return rc; - if (!(val & available_error_type)) + if (tval & (tval - 1)) return -EINVAL; + if (!vendor) { + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + if (!(val & available_error_type)) + return -EINVAL; + } error_type = val; return 0; @@ -455,7 +601,6 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) static int __init einj_init(void) { int rc; - u64 param_paddr; acpi_status status; struct dentry *fentry; struct apei_exec_context ctx; @@ -509,23 +654,30 @@ static int __init einj_init(void) rc = apei_exec_pre_map_gars(&ctx); if (rc) goto err_release; - if (param_extension) { - param_paddr = einj_get_parameter_address(); - if (param_paddr) { - einj_param = ioremap(param_paddr, sizeof(*einj_param)); - rc = -ENOMEM; - if (!einj_param) - goto err_unmap; - fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param1); - if (!fentry) - goto err_unmap; - fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param2); - if (!fentry) - goto err_unmap; - } else - pr_warn(EINJ_PFX "Parameter extension is not supported.\n"); + + einj_param = einj_get_parameter_address(); + if ((param_extension || acpi5) && einj_param) { + fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param1); + if (!fentry) + goto err_unmap; + fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param2); + if (!fentry) + goto err_unmap; + } + + if (vendor_dev[0]) { + vendor_blob.data = vendor_dev; + vendor_blob.size = strlen(vendor_dev); + fentry = debugfs_create_blob("vendor", S_IRUSR, + einj_debug_dir, &vendor_blob); + if (!fentry) + goto err_unmap; + fentry = debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR, + einj_debug_dir, &vendor_flags); + if (!fentry) + goto err_unmap; } pr_info(EINJ_PFX "Error INJection is initialized.\n"); diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 7504bc99b29..f25d7efee63 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -228,7 +228,8 @@ enum acpi_einj_actions { ACPI_EINJ_EXECUTE_OPERATION = 5, ACPI_EINJ_CHECK_BUSY_STATUS = 6, ACPI_EINJ_GET_COMMAND_STATUS = 7, - ACPI_EINJ_ACTION_RESERVED = 8, /* 8 and greater are reserved */ + ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8, + ACPI_EINJ_ACTION_RESERVED = 9, /* 9 and greater are reserved */ ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */ }; -- cgit v1.2.3-70-g09d2