From a6e4d5a03e9e3587e88aba687d8f225f4f04c792 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Mar 2013 09:14:30 +0000 Subject: x86, efivars: firmware bug workarounds should be in platform code Let's not burden ia64 with checks in the common efivars code that we're not writing too much data to the variable store. That kind of thing is an x86 firmware bug, plain and simple. efi_query_variable_store() provides platforms with a wrapper in which they can perform checks and workarounds for EFI variable storage bugs. Cc: H. Peter Anvin Cc: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 5f2ecaf3f9d..c89c245eff4 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -999,3 +999,28 @@ u64 efi_mem_attributes(unsigned long phys_addr) } return 0; } + +/* + * Some firmware has serious problems when using more than 50% of the EFI + * variable store, i.e. it triggers bugs that can brick machines. Ensure that + * we never use more than this safe limit. + * + * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable + * store. + */ +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + if (status != EFI_SUCCESS) + return status; + + if (!storage_size || size > remaining_size || size > max_size || + (remaining_size - size) < (storage_size / 2)) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} -- cgit v1.2.3-70-g09d2 From 7791c8423f1f7f4dad94e753bae67461d5b80be8 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 10 Apr 2013 10:59:34 +0200 Subject: x86,efi: Check max_size only if it is non-zero. Some EFI implementations return always a MaximumVariableSize of 0, check against max_size only if it is non-zero. My Intel DQ67SW desktop board has such an implementation. Signed-off-by: Richard Weinberger Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index c89c245eff4..3f96a487aa2 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1018,7 +1018,12 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) if (status != EFI_SUCCESS) return status; - if (!storage_size || size > remaining_size || size > max_size || + if (!max_size && remaining_size > size) + printk_once(KERN_ERR FW_BUG "Broken EFI implementation" + " is returning MaxVariableSize=0\n"); + + if (!storage_size || size > remaining_size || + (max_size && size > max_size) || (remaining_size - size) < (storage_size / 2)) return EFI_OUT_OF_RESOURCES; -- cgit v1.2.3-70-g09d2 From cc5a080c5d40c36089bb08a8a16fa3fc7047fe0f Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 15 Apr 2013 13:09:46 -0700 Subject: efi: Pass boot services variable info to runtime code EFI variables can be flagged as being accessible only within boot services. This makes it awkward for us to figure out how much space they use at runtime. In theory we could figure this out by simply comparing the results from QueryVariableInfo() to the space used by all of our variables, but that fails if the platform doesn't garbage collect on every boot. Thankfully, calling QueryVariableInfo() while still inside boot services gives a more reliable answer. This patch passes that information from the EFI boot stub up to the efi platform code. Signed-off-by: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 47 +++++++++++++++++++++++++++++++++++ arch/x86/include/asm/efi.h | 7 ++++++ arch/x86/include/uapi/asm/bootparam.h | 1 + arch/x86/platform/efi/efi.c | 21 ++++++++++++++++ 4 files changed, 76 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c205035a6b9..8615f758182 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -251,6 +251,51 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size) *size = len; } +static efi_status_t setup_efi_vars(struct boot_params *params) +{ + struct setup_data *data; + struct efi_var_bootdata *efidata; + u64 store_size, remaining_size, var_size; + efi_status_t status; + + if (!sys_table->runtime->query_variable_info) + return EFI_UNSUPPORTED; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + status = efi_call_phys4(sys_table->runtime->query_variable_info, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, &store_size, + &remaining_size, &var_size); + + if (status != EFI_SUCCESS) + return status; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, sizeof(*efidata), &efidata); + + if (status != EFI_SUCCESS) + return status; + + efidata->data.type = SETUP_EFI_VARS; + efidata->data.len = sizeof(struct efi_var_bootdata) - + sizeof(struct setup_data); + efidata->data.next = 0; + efidata->store_size = store_size; + efidata->remaining_size = remaining_size; + efidata->max_var_size = var_size; + + if (data) + data->next = (unsigned long)efidata; + else + params->hdr.setup_data = (unsigned long)efidata; + +} + static efi_status_t setup_efi_pci(struct boot_params *params) { efi_pci_io_protocol *pci; @@ -1157,6 +1202,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table, setup_graphics(boot_params); + setup_efi_vars(boot_params); + setup_efi_pci(boot_params); status = efi_call_phys3(sys_table->boottime->allocate_pool, diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 60c89f30c72..2fb5d5884e2 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -102,6 +102,13 @@ extern void efi_call_phys_epilog(void); extern void efi_unmap_memmap(void); extern void efi_memory_uc(u64 addr, unsigned long size); +struct efi_var_bootdata { + struct setup_data data; + u64 store_size; + u64 remaining_size; + u64 max_var_size; +}; + #ifdef CONFIG_EFI static inline bool efi_is_native(void) diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index c15ddaf9071..08744242b8d 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -6,6 +6,7 @@ #define SETUP_E820_EXT 1 #define SETUP_DTB 2 #define SETUP_PCI 3 +#define SETUP_EFI_VARS 4 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 3f96a487aa2..977d1ce7e17 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -69,6 +69,10 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static u64 efi_var_store_size; +static u64 efi_var_remaining_size; +static u64 efi_var_max_var_size; + unsigned long x86_efi_facility; /* @@ -682,6 +686,9 @@ void __init efi_init(void) char vendor[100] = "unknown"; int i = 0; void *tmp; + struct setup_data *data; + struct efi_var_bootdata *efi_var_data; + u64 pa_data; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -699,6 +706,20 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*efi_var_data)); + if (data->type == SETUP_EFI_VARS) { + efi_var_data = (struct efi_var_bootdata *)data; + + efi_var_store_size = efi_var_data->store_size; + efi_var_remaining_size = efi_var_data->remaining_size; + efi_var_max_var_size = efi_var_data->max_var_size; + } + pa_data = data->next; + early_iounmap(data, sizeof(*efi_var_data)); + } + set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); /* -- cgit v1.2.3-70-g09d2 From 31ff2f20d9003e74991d135f56e503fe776c127c Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 15 Apr 2013 13:09:47 -0700 Subject: efi: Distinguish between "remaining space" and actually used space EFI implementations distinguish between space that is actively used by a variable and space that merely hasn't been garbage collected yet. Space that hasn't yet been garbage collected isn't available for use and so isn't counted in the remaining_space field returned by QueryVariableInfo(). Combined with commit 68d9298 this can cause problems. Some implementations don't garbage collect until the remaining space is smaller than the maximum variable size, and as a result check_var_size() will always fail once more than 50% of the variable store has been used even if most of that space is marked as available for garbage collection. The user is unable to create new variables, and deleting variables doesn't increase the remaining space. The problem that 68d9298 was attempting to avoid was one where certain platforms fail if the actively used space is greater than 50% of the available storage space. We should be able to calculate that by simply summing the size of each available variable and subtracting that from the total storage space. With luck this will fix the problem described in https://bugzilla.kernel.org/show_bug.cgi?id=55471 without permitting damage to occur to the machines 68d9298 was attempting to fix. Signed-off-by: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 106 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 977d1ce7e17..4959e3f89d7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,13 @@ #define EFI_DEBUG 1 +/* + * There's some additional metadata associated with each + * variable. Intel's reference implementation is 60 bytes - bump that + * to account for potential alignment constraints + */ +#define VAR_METADATA_SIZE 64 + struct efi __read_mostly efi = { .mps = EFI_INVALID_TABLE_ADDR, .acpi = EFI_INVALID_TABLE_ADDR, @@ -72,6 +80,9 @@ static efi_system_table_t efi_systab __initdata; static u64 efi_var_store_size; static u64 efi_var_remaining_size; static u64 efi_var_max_var_size; +static u64 boot_used_size; +static u64 boot_var_size; +static u64 active_size; unsigned long x86_efi_facility; @@ -166,8 +177,53 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { - return efi_call_virt3(get_next_variable, - name_size, name, vendor); + efi_status_t status; + static bool finished = false; + static u64 var_size; + + status = efi_call_virt3(get_next_variable, + name_size, name, vendor); + + if (status == EFI_NOT_FOUND) { + finished = true; + if (var_size < boot_used_size) { + boot_var_size = boot_used_size - var_size; + active_size += boot_var_size; + } else { + printk(KERN_WARNING FW_BUG "efi: Inconsistent initial sizes\n"); + } + } + + if (boot_used_size && !finished) { + unsigned long size; + u32 attr; + efi_status_t s; + void *tmp; + + s = virt_efi_get_variable(name, vendor, &attr, &size, NULL); + + if (s != EFI_BUFFER_TOO_SMALL || !size) + return status; + + tmp = kmalloc(size, GFP_ATOMIC); + + if (!tmp) + return status; + + s = virt_efi_get_variable(name, vendor, &attr, &size, tmp); + + if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) { + var_size += size; + var_size += ucs2_strsize(name, 1024); + active_size += size; + active_size += VAR_METADATA_SIZE; + active_size += ucs2_strsize(name, 1024); + } + + kfree(tmp); + } + + return status; } static efi_status_t virt_efi_set_variable(efi_char16_t *name, @@ -176,9 +232,34 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, unsigned long data_size, void *data) { - return efi_call_virt5(set_variable, - name, vendor, attr, - data_size, data); + efi_status_t status; + u32 orig_attr = 0; + unsigned long orig_size = 0; + + status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size, + NULL); + + if (status != EFI_BUFFER_TOO_SMALL) + orig_size = 0; + + status = efi_call_virt5(set_variable, + name, vendor, attr, + data_size, data); + + if (status == EFI_SUCCESS) { + if (orig_size) { + active_size -= orig_size; + active_size -= ucs2_strsize(name, 1024); + active_size -= VAR_METADATA_SIZE; + } + if (data_size) { + active_size += data_size; + active_size += ucs2_strsize(name, 1024); + active_size += VAR_METADATA_SIZE; + } + } + + return status; } static efi_status_t virt_efi_query_variable_info(u32 attr, @@ -720,6 +801,8 @@ void __init efi_init(void) early_iounmap(data, sizeof(*efi_var_data)); } + boot_used_size = efi_var_store_size - efi_var_remaining_size; + set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); /* @@ -1042,10 +1125,21 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) if (!max_size && remaining_size > size) printk_once(KERN_ERR FW_BUG "Broken EFI implementation" " is returning MaxVariableSize=0\n"); + /* + * Some firmware implementations refuse to boot if there's insufficient + * space in the variable store. We account for that by refusing the + * write if permitting it would reduce the available space to under + * 50%. However, some firmware won't reclaim variable space until + * after the used (not merely the actively used) space drops below + * a threshold. We can approximate that case with the value calculated + * above. If both the firmware and our calculations indicate that the + * available space would drop below 50%, refuse the write. + */ if (!storage_size || size > remaining_size || (max_size && size > max_size) || - (remaining_size - size) < (storage_size / 2)) + ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) && + (remaining_size - size < storage_size / 2))) return EFI_OUT_OF_RESOURCES; return EFI_SUCCESS; -- cgit v1.2.3-70-g09d2 From f1923820c447e986a9da0fc6bf60c1dccdf0408e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 16 Apr 2013 13:51:43 +0200 Subject: perf/x86: Fix offcore_rsp valid mask for SNB/IVB The valid mask for both offcore_response_0 and offcore_response_1 was wrong for SNB/SNB-EP, IVB/IVB-EP. It was possible to write to reserved bit and cause a GP fault crashing the kernel. This patch fixes the problem by correctly marking the reserved bits in the valid mask for all the processors mentioned above. A distinction between desktop and server parts is introduced because bits 24-30 are only available on the server parts. This version of the patch is just a rebase to perf/urgent tree and should apply to older kernels as well. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: jolsa@redhat.com Cc: gregkh@linuxfoundation.org Cc: security@kernel.org Cc: ak@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index dab7580c47a..cc45deb791b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -153,8 +153,14 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = }; static struct extra_reg intel_snb_extra_regs[] __read_mostly = { - INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), - INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), + EVENT_EXTRA_END +}; + +static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), EVENT_EXTRA_END }; @@ -2097,7 +2103,10 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - x86_pmu.extra_regs = intel_snb_extra_regs; + if (boot_cpu_data.x86_model == 45) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; @@ -2123,7 +2132,10 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_ivb_event_constraints; x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - x86_pmu.extra_regs = intel_snb_extra_regs; + if (boot_cpu_data.x86_model == 62) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; -- cgit v1.2.3-70-g09d2 From f6ce5002629e08eaa777ced3872a98f76845143e Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 16 Apr 2013 18:31:08 +0400 Subject: x86/Kconfig: Make EFI select UCS2_STRING The commit "efi: Distinguish between "remaining space" and actually used space" added usage of ucs2_*() functions to arch/x86/platform/efi/efi.c, but the only thing which selected UCS2_STRING was EFI_VARS, which is technically optional and can be built as a module. Signed-off-by: Sergey Vlasov Signed-off-by: Matt Fleming --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a4f24f5b121..01af8535f58 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1549,6 +1549,7 @@ config X86_SMAP config EFI bool "EFI runtime service support" depends on ACPI + select UCS2_STRING ---help--- This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). -- cgit v1.2.3-70-g09d2 From 3668011d4ad556224f7c012c1e870a6eaa0e59da Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 16 Apr 2013 18:31:09 +0400 Subject: efi: Export efi_query_variable_store() for efivars.ko Fixes build with CONFIG_EFI_VARS=m which was broken after the commit "x86, efivars: firmware bug workarounds should be in platform code". Signed-off-by: Sergey Vlasov Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4959e3f89d7..4f364c7c611 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1144,3 +1144,4 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) return EFI_SUCCESS; } +EXPORT_SYMBOL_GPL(efi_query_variable_store); -- cgit v1.2.3-70-g09d2 From 8c58bf3eec3b8fc8162fe557e9361891c20758f2 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 17 Apr 2013 01:00:53 +0200 Subject: x86,efi: Implement efi_no_storage_paranoia parameter Using this parameter one can disable the storage_size/2 check if he is really sure that the UEFI does sane gc and fulfills the spec. This parameter is useful if a devices uses more than 50% of the storage by default. The Intel DQSW67 desktop board is such a sucker for exmaple. Signed-off-by: Richard Weinberger Signed-off-by: Matt Fleming --- Documentation/kernel-parameters.txt | 6 ++++++ arch/x86/platform/efi/efi.c | 14 +++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc3..d1cc3a9fa14 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -788,6 +788,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. edd= [EDD] Format: {"off" | "on" | "skip[mbr]"} + efi_no_storage_paranoia [EFI; X86] + Using this parameter you can use more than 50% of + your efi variable storage. Use this parameter only if + you are really sure that your UEFI does sane gc and + fulfills the spec otherwise your board may brick. + eisa_irq_edge= [PARISC,HW] See header of drivers/parisc/eisa.c. diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4f364c7c611..e4a86a677ce 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -113,6 +113,15 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); +static bool efi_no_storage_paranoia; + +static int __init setup_storage_paranoia(char *arg) +{ + efi_no_storage_paranoia = true; + return 0; +} +early_param("efi_no_storage_paranoia", setup_storage_paranoia); + static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { @@ -1137,7 +1146,10 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) */ if (!storage_size || size > remaining_size || - (max_size && size > max_size) || + (max_size && size > max_size)) + return EFI_OUT_OF_RESOURCES; + + if (!efi_no_storage_paranoia && ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) && (remaining_size - size < storage_size / 2))) return EFI_OUT_OF_RESOURCES; -- cgit v1.2.3-70-g09d2 From c729de8fcea37a1c444e81857eace12494c804a9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:45 -0700 Subject: x86, kdump: Set crashkernel_low automatically Chao said that kdump does does work well on his system on 3.8 without extra parameter, even iommu does not work with kdump. And now have to append crashkernel_low=Y in first kernel to make kdump work. We have now modified crashkernel=X to allocate memory beyong 4G (if available) and do not allocate low range for crashkernel if the user does not specify that with crashkernel_low=Y. This causes regression if iommu is not enabled. Without iommu, swiotlb needs to be setup in first 4G and there is no low memory available to second kernel. Set crashkernel_low automatically if the user does not specify that. For system that does support IOMMU with kdump properly, user could specify crashkernel_low=0 to save that 72M low ram. -v3: add swiotlb_size() according to Konrad. -v4: add comments what 8M is for according to hpa. also update more crashkernel_low= in kernel-parameters.txt -v5: update changelog according to Vivek. -v6: Change description about swiotlb referring according to HATAYAMA. Reported-by: WANG Chao Tested-by: WANG Chao Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-2-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 14 +++++++++++--- arch/x86/kernel/setup.c | 21 ++++++++++++++++++--- include/linux/swiotlb.h | 1 + lib/swiotlb.c | 19 +++++++++++++++---- 4 files changed, 45 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc3..cff672da248 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -596,9 +596,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. is selected automatically. Check Documentation/kdump/kdump.txt for further details. - crashkernel_low=size[KMG] - [KNL, x86] parts under 4G. - crashkernel=range1:size1[,range2:size2,...][@offset] [KNL] Same as above, but depends on the memory in the running system. The syntax of range is @@ -606,6 +603,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. + crashkernel_low=size[KMG] + [KNL, x86_64] range under 4G. When crashkernel= is + passed, kernel allocate physical memory region + above 4G, that cause second kernel crash on system + that require some amount of low memory, e.g. swiotlb + requires at least 64M+32K low memory. Kernel would + try to allocate 72M below 4G automatically. + This one let user to specify own low range under 4G + for second kernel instead. + 0: to disable low allocation. + cs89x0_dma= [HW,NET] Format: diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 90d8cc930f5..12349202cae 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -521,19 +521,34 @@ static void __init reserve_crashkernel_low(void) unsigned long long low_base = 0, low_size = 0; unsigned long total_low_mem; unsigned long long base; + bool auto_set = false; int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); - if (ret != 0 || low_size <= 0) - return; + if (ret != 0) { + /* + * two parts from lib/swiotlb.c: + * swiotlb size: user specified with swiotlb= or default. + * swiotlb overflow buffer: now is hardcoded to 32k. + * We round it to 8M for other buffers that + * may need to stay low too. + */ + low_size = swiotlb_size_or_default() + (8UL<<20); + auto_set = true; + } else { + /* passed with crashkernel_low=0 ? */ + if (!low_size) + return; + } low_base = memblock_find_in_range(low_size, (1ULL<<32), low_size, alignment); if (!low_base) { - pr_info("crashkernel low reservation failed - No suitable area found.\n"); + if (!auto_set) + pr_info("crashkernel low reservation failed - No suitable area found.\n"); return; } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 2de42f9401d..a5ffd32642f 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -25,6 +25,7 @@ extern int swiotlb_force; extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); +unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); /* diff --git a/lib/swiotlb.c b/lib/swiotlb.c index bfe02b8fc55..d23762e6652 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -105,9 +105,9 @@ setup_io_tlb_npages(char *str) if (!strcmp(str, "force")) swiotlb_force = 1; - return 1; + return 0; } -__setup("swiotlb=", setup_io_tlb_npages); +early_param("swiotlb", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ unsigned long swiotlb_nr_tbl(void) @@ -115,6 +115,18 @@ unsigned long swiotlb_nr_tbl(void) return io_tlb_nslabs; } EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); + +/* default to 64MB */ +#define IO_TLB_DEFAULT_SIZE (64UL<<20) +unsigned long swiotlb_size_or_default(void) +{ + unsigned long size; + + size = io_tlb_nslabs << IO_TLB_SHIFT; + + return size ? size : (IO_TLB_DEFAULT_SIZE); +} + /* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) @@ -188,8 +200,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) void __init swiotlb_init(int verbose) { - /* default to 64MB */ - size_t default_size = 64UL<<20; + size_t default_size = IO_TLB_DEFAULT_SIZE; unsigned char *vstart; unsigned long bytes; -- cgit v1.2.3-70-g09d2 From 55a20ee7804ab64ac90bcdd4e2868a42829e2784 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:47 -0700 Subject: x86, kdump: Retore crashkernel= to allocate under 896M Vivek found old kexec-tools does not work new kernel anymore. So change back crashkernel= back to old behavoir, and add crashkernel_high= to let user decide if buffer could be above 4G, and also new kexec-tools will be needed. -v2: let crashkernel=X override crashkernel_high= update description about _high will be ignored by crashkernel=X -v3: update description about kernel-parameters.txt according to Vivek. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-4-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 13 +++++++++++-- arch/x86/kernel/setup.c | 24 +++++++++++++++++++----- include/linux/kexec.h | 2 ++ kernel/kexec.c | 9 +++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cff672da248..709eb3edc6b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -603,9 +603,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. + crashkernel_high=size[KMG] + [KNL, x86_64] range could be above 4G. Allow kernel + to allocate physical memory region from top, so could + be above 4G if system have more than 4G ram installed. + Otherwise memory region will be allocated below 4G, if + available. + It will be ignored if crashkernel=X is specified. crashkernel_low=size[KMG] - [KNL, x86_64] range under 4G. When crashkernel= is - passed, kernel allocate physical memory region + [KNL, x86_64] range under 4G. When crashkernel_high= is + passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb requires at least 64M+32K low memory. Kernel would @@ -613,6 +620,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. + It will be ignored when crashkernel_high=X is not used + or memory reserved is below 4G. cs89x0_dma= [HW,NET] Format: diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 12349202cae..a85a144f205 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -507,11 +507,14 @@ static void __init memblock_x86_reserve_range_setup_data(void) /* * Keep the crash kernel below this limit. On 32 bits earlier kernels * would limit the kernel to the low 512 MiB due to mapping restrictions. + * On 64bit, old kexec-tools need to under 896MiB. */ #ifdef CONFIG_X86_32 -# define CRASH_KERNEL_ADDR_MAX (512 << 20) +# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20) +# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20) #else -# define CRASH_KERNEL_ADDR_MAX MAXMEM +# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20) +# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM #endif static void __init reserve_crashkernel_low(void) @@ -525,6 +528,7 @@ static void __init reserve_crashkernel_low(void) int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); + /* crashkernel_low=YM */ ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); if (ret != 0) { @@ -569,14 +573,22 @@ static void __init reserve_crashkernel(void) const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long total_mem; unsigned long long crash_size, crash_base; + bool high = false; int ret; total_mem = memblock_phys_mem_size(); + /* crashkernel=XM */ ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); - if (ret != 0 || crash_size <= 0) - return; + if (ret != 0 || crash_size <= 0) { + /* crashkernel_high=XM */ + ret = parse_crashkernel_high(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret != 0 || crash_size <= 0) + return; + high = true; + } /* 0 means: find the address automatically */ if (crash_base <= 0) { @@ -584,7 +596,9 @@ static void __init reserve_crashkernel(void) * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX */ crash_base = memblock_find_in_range(alignment, - CRASH_KERNEL_ADDR_MAX, crash_size, alignment); + high ? CRASH_KERNEL_ADDR_HIGH_MAX : + CRASH_KERNEL_ADDR_LOW_MAX, + crash_size, alignment); if (!crash_base) { pr_info("crashkernel reservation failed - No suitable area found.\n"); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d2e6927bbaa..d78d28a733b 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -200,6 +200,8 @@ extern size_t vmcoreinfo_max_size; int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); int crash_shrink_memory(unsigned long new_size); diff --git a/kernel/kexec.c b/kernel/kexec.c index bddd3d7a74b..1b2f73f5f9b 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1422,6 +1422,15 @@ int __init parse_crashkernel(char *cmdline, "crashkernel="); } +int __init parse_crashkernel_high(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel_high="); +} + int __init parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, -- cgit v1.2.3-70-g09d2 From adbc742bf78695bb98c79d18c558b61571748b99 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:48 -0700 Subject: x86, kdump: Change crashkernel_high/low= to crashkernel=,high/low Per hpa, use crashkernel=X,high crashkernel=Y,low instead of crashkernel_hign=X crashkernel_low=Y. As that could be extensible. -v2: according to Vivek, change delimiter to ; -v3: let hign and low only handle simple form and it conforms to description in kernel-parameters.txt still keep crashkernel=X override any crashkernel=X,high crashkernel=Y,low -v4: update get_last_crashkernel returning and add more strict checking in parse_crashkernel_simple() found by HATAYAMA. -v5: Change delimiter back to , according to HPA. also separate parse_suffix from parse_simper according to vivek. so we can avoid @pos in that path. -v6: Tight the checking about crashkernel=X,highblahblah,high found by HTYAYAMA. Cc: HATAYAMA Daisuke Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-5-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 10 ++-- arch/x86/kernel/setup.c | 6 +- kernel/kexec.c | 109 +++++++++++++++++++++++++++++++----- 3 files changed, 104 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 709eb3edc6b..a1ac1f1d6d3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -603,16 +603,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. - crashkernel_high=size[KMG] + crashkernel=size[KMG],high [KNL, x86_64] range could be above 4G. Allow kernel to allocate physical memory region from top, so could be above 4G if system have more than 4G ram installed. Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. - crashkernel_low=size[KMG] - [KNL, x86_64] range under 4G. When crashkernel_high= is - passed, kernel could allocate physical memory region + crashkernel=size[KMG],low + [KNL, x86_64] range under 4G. When crashkernel=X,high + is passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb requires at least 64M+32K low memory. Kernel would @@ -620,7 +620,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. - It will be ignored when crashkernel_high=X is not used + It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. cs89x0_dma= [HW,NET] diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a85a144f205..fae9134a2de 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -528,7 +528,7 @@ static void __init reserve_crashkernel_low(void) int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); - /* crashkernel_low=YM */ + /* crashkernel=Y,low */ ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); if (ret != 0) { @@ -542,7 +542,7 @@ static void __init reserve_crashkernel_low(void) low_size = swiotlb_size_or_default() + (8UL<<20); auto_set = true; } else { - /* passed with crashkernel_low=0 ? */ + /* passed with crashkernel=0,low ? */ if (!low_size) return; } @@ -582,7 +582,7 @@ static void __init reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) { - /* crashkernel_high=XM */ + /* crashkernel=X,high */ ret = parse_crashkernel_high(boot_command_line, total_mem, &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) diff --git a/kernel/kexec.c b/kernel/kexec.c index 1b2f73f5f9b..401fdb041f3 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char *cmdline, return 0; } +#define SUFFIX_HIGH 0 +#define SUFFIX_LOW 1 +#define SUFFIX_NULL 2 +static __initdata char *suffix_tbl[] = { + [SUFFIX_HIGH] = ",high", + [SUFFIX_LOW] = ",low", + [SUFFIX_NULL] = NULL, +}; + /* - * That function is the entry point for command line parsing and should be - * called from the arch-specific code. + * That function parses "suffix" crashkernel command lines like + * + * crashkernel=size,[high|low] + * + * It returns 0 on success and -EINVAL on failure. */ +static int __init parse_crashkernel_suffix(char *cmdline, + unsigned long long *crash_size, + unsigned long long *crash_base, + const char *suffix) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + /* check with suffix */ + if (strncmp(cur, suffix, strlen(suffix))) { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + cur += strlen(suffix); + if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + + return 0; +} + +static __init char *get_last_crashkernel(char *cmdline, + const char *name, + const char *suffix) +{ + char *p = cmdline, *ck_cmdline = NULL; + + /* find crashkernel and use the last one if there are more */ + p = strstr(p, name); + while (p) { + char *end_p = strchr(p, ' '); + char *q; + + if (!end_p) + end_p = p + strlen(p); + + if (!suffix) { + int i; + + /* skip the one with any known suffix */ + for (i = 0; suffix_tbl[i]; i++) { + q = end_p - strlen(suffix_tbl[i]); + if (!strncmp(q, suffix_tbl[i], + strlen(suffix_tbl[i]))) + goto next; + } + ck_cmdline = p; + } else { + q = end_p - strlen(suffix); + if (!strncmp(q, suffix, strlen(suffix))) + ck_cmdline = p; + } +next: + p = strstr(p+1, name); + } + + if (!ck_cmdline) + return NULL; + + return ck_cmdline; +} + static int __init __parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - const char *name) + const char *name, + const char *suffix) { - char *p = cmdline, *ck_cmdline = NULL; char *first_colon, *first_space; + char *ck_cmdline; BUG_ON(!crash_size || !crash_base); *crash_size = 0; *crash_base = 0; - /* find crashkernel and use the last one if there are more */ - p = strstr(p, name); - while (p) { - ck_cmdline = p; - p = strstr(p+1, name); - } + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); if (!ck_cmdline) return -EINVAL; ck_cmdline += strlen(name); + if (suffix) + return parse_crashkernel_suffix(ck_cmdline, crash_size, + crash_base, suffix); /* * if the commandline contains a ':', then that's the extended * syntax -- if not, it must be the classic syntax @@ -1413,13 +1492,17 @@ static int __init __parse_crashkernel(char *cmdline, return 0; } +/* + * That function is the entry point for command line parsing and should be + * called from the arch-specific code. + */ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel="); + "crashkernel=", NULL); } int __init parse_crashkernel_high(char *cmdline, @@ -1428,7 +1511,7 @@ int __init parse_crashkernel_high(char *cmdline, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel_high="); + "crashkernel=", suffix_tbl[SUFFIX_HIGH]); } int __init parse_crashkernel_low(char *cmdline, @@ -1437,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel_low="); + "crashkernel=", suffix_tbl[SUFFIX_LOW]); } static void update_vmcoreinfo_note(void) -- cgit v1.2.3-70-g09d2 From 7eff7ded02d1b15ba8321664839b353fa6c0c1e4 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 18 Apr 2013 08:44:46 -0700 Subject: x86, hyperv: Handle Xen emulation of Hyper-V more gracefully Install the Hyper-V specific interrupt handler only when needed. This would permit us to get rid of the Xen check. Note that when the vmbus drivers invokes the call to register its handler, we are sure to be running on Hyper-V. Signed-off-by: K. Y. Srinivasan Link: http://lkml.kernel.org/r/1366299886-6399-1-git-send-email-kys@microsoft.com Acked-by: Michael S. Tsirkin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mshyperv.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index a7d26d83fb7..8f4be53ea04 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -35,13 +35,6 @@ static bool __init ms_hyperv_platform(void) if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - /* - * Xen emulates Hyper-V to support enlightened Windows. - * Check to see first if we are on a Xen Hypervisor. - */ - if (xen_cpuid_base()) - return false; - cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); @@ -82,12 +75,6 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); -#if IS_ENABLED(CONFIG_HYPERV) - /* - * Setup the IDT for hypervisor callback. - */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); -#endif } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { @@ -103,6 +90,11 @@ static irq_handler_t vmbus_isr; void hv_register_vmbus_handler(int irq, irq_handler_t handler) { + /* + * Setup the IDT for hypervisor callback. + */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); + vmbus_irq = irq; vmbus_isr = handler; } -- cgit v1.2.3-70-g09d2 From 74c3e3fcf350b2e7e3eaf9550528ee3f74e44b37 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 19 Apr 2013 16:36:03 -0700 Subject: x86, microcode: Verify the family before dispatching microcode patching For each CPU vendor that implements CPU microcode patching, there will be a minimum family for which this is implemented. Verify this minimum level of support. This can be done in the dispatch function or early in the application functions. Doing the latter turned out to be somewhat awkward because of the ineviable split between the BSP and the AP paths, and rather than pushing deep into the application functions, do this in the dispatch function. Reported-by: "Bryan O'Donoghue" Suggested-by: Borislav Petkov Cc: Fenghua Yu Link: http://lkml.kernel.org/r/1366392183-4149-1-git-send-email-bryan.odonoghue.lkml@nexus-software.ie --- arch/x86/kernel/microcode_core_early.c | 38 +++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c index 577db8417d1..833d51d6ee0 100644 --- a/arch/x86/kernel/microcode_core_early.c +++ b/arch/x86/kernel/microcode_core_early.c @@ -45,9 +45,6 @@ static int __cpuinit x86_vendor(void) u32 eax = 0x00000000; u32 ebx, ecx = 0, edx; - if (!have_cpuid_p()) - return X86_VENDOR_UNKNOWN; - native_cpuid(&eax, &ebx, &ecx, &edx); if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) @@ -59,18 +56,45 @@ static int __cpuinit x86_vendor(void) return X86_VENDOR_UNKNOWN; } +static int __cpuinit x86_family(void) +{ + u32 eax = 0x00000001; + u32 ebx, ecx = 0, edx; + int x86; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + x86 = (eax >> 8) & 0xf; + if (x86 == 15) + x86 += (eax >> 20) & 0xff; + + return x86; +} + void __init load_ucode_bsp(void) { - int vendor = x86_vendor(); + int vendor, x86; + + if (!have_cpuid_p()) + return; - if (vendor == X86_VENDOR_INTEL) + vendor = x86_vendor(); + x86 = x86_family(); + + if (vendor == X86_VENDOR_INTEL && x86 >= 6) load_ucode_intel_bsp(); } void __cpuinit load_ucode_ap(void) { - int vendor = x86_vendor(); + int vendor, x86; + + if (!have_cpuid_p()) + return; + + vendor = x86_vendor(); + x86 = x86_family(); - if (vendor == X86_VENDOR_INTEL) + if (vendor == X86_VENDOR_INTEL && x86 >= 6) load_ucode_intel_ap(); } -- cgit v1.2.3-70-g09d2