diff options
author | H. Peter Anvin <hpa@linux.intel.com> | 2010-07-28 13:11:28 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2010-07-28 13:11:28 -0700 |
commit | 7d50d07da23995a18ac449636cb42aec2cb2808d (patch) | |
tree | 0ddf940a5e9ddc5eeebc996601d7fee3c36ab836 /arch/x86 | |
parent | 2decb194e65ab66eaf787512dc572cdc99893b24 (diff) | |
parent | 6aa033d7efb85830535bb83cf6713d6025ae6e59 (diff) |
Merge remote branch 'linus/master' into x86/cpu
Diffstat (limited to 'arch/x86')
39 files changed, 245 insertions, 90 deletions
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index ed7aeff786b..45bc9402aa4 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -41,13 +41,12 @@ static __videocard video_vga; static u8 vga_set_basic_mode(void) { struct biosregs ireg, oreg; - u16 ax; u8 mode; initregs(&ireg); /* Query current mode */ - ax = 0x0f00; + ireg.ax = 0x0f00; intcall(0x10, &ireg, &oreg); mode = oreg.al; diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 63cb4096c3d..9cb2edb87c2 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -183,7 +183,7 @@ struct mp_ioapic_gsi{ u32 gsi_end; }; extern struct mp_ioapic_gsi mp_gsi_routing[]; -extern u32 gsi_end; +extern u32 gsi_top; int mp_find_ioapic(u32 gsi); int mp_find_ioapic_pin(int ioapic, u32 gsi); void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); @@ -197,7 +197,7 @@ static const int timer_through_8259 = 0; static inline void ioapic_init_mappings(void) { } static inline void ioapic_insert_resources(void) { } static inline void probe_nr_irqs_gsi(void) { } -#define gsi_end (NR_IRQS_LEGACY - 1) +#define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } struct io_apic_irq_attr; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e57bc20683d..7cc4a026331 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -110,6 +110,7 @@ #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0797e748d28..cd28f9ad910 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -77,6 +77,7 @@ do { \ if (0) { \ pto_T__ pto_tmp__; \ pto_tmp__ = (val); \ + (void)pto_tmp__; \ } \ switch (sizeof(var)) { \ case 1: \ @@ -115,6 +116,7 @@ do { \ if (0) { \ pao_T__ pao_tmp__; \ pao_tmp__ = (val); \ + (void)pao_tmp__; \ } \ switch (sizeof(var)) { \ case 1: \ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 5e67c153231..ed5903be26f 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -26,7 +26,7 @@ */ #define VMALLOC_OFFSET (8 * 1024 * 1024) -#ifndef __ASSEMBLER__ +#ifndef __ASSEMBLY__ extern bool __vmalloc_start_set; /* set once high_memory is set */ #endif diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 48dcfa62ea0..fd921c3a684 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -15,6 +15,8 @@ static inline int arch_prepare_suspend(void) { return 0; } struct saved_context { u16 es, fs, gs, ss; unsigned long cr0, cr2, cr3, cr4; + u64 misc_enable; + bool misc_enable_saved; struct desc_ptr gdt; struct desc_ptr idt; u16 ldt; diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 06284f42b75..8d942afae68 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -27,6 +27,8 @@ struct saved_context { u16 ds, es, fs, gs, ss; unsigned long gs_base, gs_kernel_base, fs_base; unsigned long cr0, cr2, cr3, cr4, cr8; + u64 misc_enable; + bool misc_enable_saved; unsigned long efer; u16 gdt_pad; u16 gdt_limit; diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index b8fe48ee2ed..e7f4d33c55e 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -451,7 +451,7 @@ void stop_this_cpu(void *dummy); * * (Could use an alternative three way for this if there was one.) */ -static inline void rdtsc_barrier(void) +static __always_inline void rdtsc_barrier(void) { alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 519b54327d7..baa579c8e03 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -142,6 +142,7 @@ struct x86_cpuinit_ops { * @set_wallclock: set time back to HW clock * @is_untracked_pat_range exclude from PAT logic * @nmi_init enable NMI on cpus + * @i8042_detect pre-detect if i8042 controller exists */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); @@ -150,6 +151,7 @@ struct x86_platform_ops { void (*iommu_shutdown)(void); bool (*is_untracked_pat_range)(u64 start, u64 end); void (*nmi_init)(void); + int (*i8042_detect)(void); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 60cc4058ed5..c05872aa3ce 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -118,7 +118,7 @@ static unsigned int gsi_to_irq(unsigned int gsi) if (gsi >= NR_IRQS_LEGACY) irq = gsi; else - irq = gsi_end + 1 + gsi; + irq = gsi_top + gsi; return irq; } @@ -129,10 +129,10 @@ static u32 irq_to_gsi(int irq) if (irq < NR_IRQS_LEGACY) gsi = isa_irq_to_gsi[irq]; - else if (irq <= gsi_end) + else if (irq < gsi_top) gsi = irq; - else if (irq <= (gsi_end + NR_IRQS_LEGACY)) - gsi = irq - gsi_end; + else if (irq < (gsi_top + NR_IRQS_LEGACY)) + gsi = irq - gsi_top; else gsi = 0xffffffff; diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 2e837f5080f..fb7a5f052e2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -145,6 +145,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, percpu_entry->states[cx->index].eax = cx->address; percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; } + + /* + * For _CST FFH on Intel, if GAS.access_size bit 1 is cleared, + * then we should skip checking BM_STS for this C-state. + * ref: "Intel Processor Vendor-Specific ACPI Interface Specification" + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2)) + cx->bm_sts_skip = 1; + return retval; } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b9..fcc3c61fdec 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -157,9 +157,14 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); + if (strncmp(str, "s4_nonvs", 8) == 0) { + pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " + "please use acpi_sleep=nonvs instead"); + acpi_nvs_nosave(); + } #endif + if (strncmp(str, "nonvs", 5) == 0) + acpi_nvs_nosave(); if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); str = strchr(str, ','); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c02cc692985..a96489ee6ca 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -921,7 +921,7 @@ void disable_local_APIC(void) unsigned int value; /* APIC hasn't been mapped yet */ - if (!apic_phys) + if (!x2apic_mode && !apic_phys) return; clear_local_APIC(); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 33f3563a2a5..e41ed24ab26 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -89,8 +89,8 @@ int nr_ioapics; /* IO APIC gsi routing info */ struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; -/* The last gsi number used */ -u32 gsi_end; +/* The one past the highest gsi number used */ +u32 gsi_top; /* MP IRQ source entries */ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; @@ -1035,7 +1035,7 @@ static int pin_2_irq(int idx, int apic, int pin) if (gsi >= NR_IRQS_LEGACY) irq = gsi; else - irq = gsi_end + 1 + gsi; + irq = gsi_top + gsi; } #ifdef CONFIG_X86_32 @@ -3853,7 +3853,7 @@ void __init probe_nr_irqs_gsi(void) { int nr; - nr = gsi_end + 1 + NR_IRQS_LEGACY; + nr = gsi_top + NR_IRQS_LEGACY; if (nr > nr_irqs_gsi) nr_irqs_gsi = nr; @@ -4294,8 +4294,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) */ nr_ioapic_registers[idx] = entries; - if (mp_gsi_routing[idx].gsi_end > gsi_end) - gsi_end = mp_gsi_routing[idx].gsi_end; + if (mp_gsi_routing[idx].gsi_end >= gsi_top) + gsi_top = mp_gsi_routing[idx].gsi_end + 1; printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 611df11ba15..c2897b7b4a3 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -102,8 +102,8 @@ static const u64 amd_perfmon_event_map[] = [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, }; static u64 amd_pmu_event_map(int hw_event) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index fdbc652d3fe..214ac860ebe 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -72,6 +72,7 @@ static struct event_constraint intel_westmere_event_constraints[] = INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ EVENT_CONSTRAINT_END }; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7bca3c6a02f..0d6fc71bedb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -729,7 +729,7 @@ static int __init e820_mark_nvs_memory(void) struct e820entry *ei = &e820.map[i]; if (ei->type == E820_NVS) - hibernate_nvs_register(ei->addr, ei->size); + suspend_nvs_register(ei->addr, ei->size); } return 0; diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index ebdb85cf268..e5cc7e82e60 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -18,6 +18,7 @@ #include <asm/apic.h> #include <asm/iommu.h> #include <asm/gart.h> +#include <asm/hpet.h> static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -191,6 +192,21 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif +/* + * Force the read back of the CMP register in hpet_next_event() + * to work around the problem that the CMP register write seems to be + * delayed. See hpet_next_event() for details. + * + * We do this on all SMBUS incarnations for now until we have more + * information about the affected chipsets. + */ +static void __init ati_hpet_bugs(int num, int slot, int func) +{ +#ifdef CONFIG_HPET_TIMER + hpet_readback_cmp = 1; +#endif +} + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -220,6 +236,8 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, + { PCI_VENDOR_ID_ATI, PCI_ANY_ID, + PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs }, {} }; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0697ff13983..4db7c4d12ff 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -571,8 +571,8 @@ auditsys: * masked off. */ sysret_audit: - movq %rax,%rsi /* second arg, syscall return value */ - cmpq $0,%rax /* is it < 0? */ + movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ + cmpq $0,%rsi /* is it < 0? */ setl %al /* 1 if so, 0 if not */ movzbl %al,%edi /* zero-extend that into %edi */ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 345a4b1fe14..675879b65ce 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -640,8 +640,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) /* Skip cs, ip, orig_ax and gs. */ \ " subl $16, %esp\n" \ " pushl %fs\n" \ - " pushl %ds\n" \ " pushl %es\n" \ + " pushl %ds\n" \ " pushl %eax\n" \ " pushl %ebp\n" \ " pushl %edi\n" \ diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5ae5d2426ed..d86dbf7e54b 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -123,7 +123,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", m->apicid, m->apicver, m->apicaddr); - mp_register_ioapic(m->apicid, m->apicaddr, gsi_end + 1); + mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); } static void print_MP_intsrc_info(struct mpc_intsrc *m) diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index e796448f0eb..5915e0b3330 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c @@ -216,6 +216,12 @@ static void __init mrst_setup_boot_clock(void) setup_boot_APIC_clock(); }; +/* MID systems don't have i8042 controller */ +static int mrst_i8042_detect(void) +{ + return 0; +} + /* * Moorestown specific x86_init function overrides and early setup * calls. @@ -233,6 +239,7 @@ void __init x86_mrst_early_setup(void) x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; x86_platform.calibrate_tsc = mrst_calibrate_tsc; + x86_platform.i8042_detect = mrst_i8042_detect; x86_init.pci.init = pci_mrst_init; x86_init.pci.fixup_irqs = x86_init_noop; diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index fb99f7edb34..078d4ec1a9d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -103,11 +103,16 @@ int use_calgary __read_mostly = 0; #define PMR_SOFTSTOPFAULT 0x40000000 #define PMR_HARDSTOP 0x20000000 -#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */ -#define MAX_NUM_CHASSIS 8 /* max number of chassis */ -/* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */ -#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2) -#define PHBS_PER_CALGARY 4 +/* + * The maximum PHB bus number. + * x3950M2 (rare): 8 chassis, 48 PHBs per chassis = 384 + * x3950M2: 4 chassis, 48 PHBs per chassis = 192 + * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256 + * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128 + */ +#define MAX_PHB_BUS_NUM 256 + +#define PHBS_PER_CALGARY 4 /* register offsets in Calgary's internal register space */ static const unsigned long tar_offsets[] = { @@ -1051,8 +1056,6 @@ static int __init calgary_init_one(struct pci_dev *dev) struct iommu_table *tbl; int ret; - BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - bbar = busno_to_bbar(dev->bus->number); ret = calgary_setup_tar(dev, bbar); if (ret) diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index e72d3fc6547..939b9e98245 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -498,15 +498,10 @@ void force_hpet_resume(void) * See erratum #27 (Misinterpreted MSI Requests May Result in * Corrupted LPC DMA Data) in AMD Publication #46837, * "SB700 Family Product Errata", Rev. 1.0, March 2010. - * - * Also force the read back of the CMP register in hpet_next_event() - * to work around the problem that the CMP register write seems to be - * delayed. See hpet_next_event() for details. */ static void force_disable_hpet_msi(struct pci_dev *unused) { hpet_msi_disable = 1; - hpet_readback_cmp = 1; } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 8e1aac86b50..e3af342fe83 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -228,6 +228,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"), }, }, + { /* Handle problems with rebooting on Dell T7400's */ + .callback = set_bios_reboot, + .ident = "Dell Precision T7400", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"), + }, + }, { /* Handle problems with rebooting on HP laptops */ .callback = set_bios_reboot, .ident = "HP Compaq Laptop", diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index de3b63ae3da..a60df9ae645 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -238,6 +238,15 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); + /* + * Ensure that the boot cpu numa_node is correct when the boot + * cpu is on a node that doesn't have memory installed. + * Also cpu_up() will call cpu_to_node() for APs when + * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set + * up later with c_init aka intel_init/amd_init. + * So set them all (boot cpu and all APs). + */ + set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); #endif #endif /* @@ -257,14 +266,6 @@ void __init setup_per_cpu_areas(void) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) - /* - * make sure boot cpu numa_node is right, when boot cpu is on the - * node that doesn't have mem installed - */ - set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id)); -#endif - /* Setup node to cpumask map */ setup_node_to_cpumask_map(); diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c index 7ded57896c0..cb22acf3ed0 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/kernel/sfi.c @@ -93,7 +93,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) pentry = (struct sfi_apic_table_entry *)sb->pentry; for (i = 0; i < num; i++) { - mp_register_ioapic(i, pentry->phys_addr, gsi_end + 1); + mp_register_ioapic(i, pentry->phys_addr, gsi_top); pentry++; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 142d70c74b0..725ef4d17cd 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -526,6 +526,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; + int user_icebp = 0; unsigned long dr6; int si_code; @@ -534,6 +535,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Filter out all the reserved bits which are preset to 1 */ dr6 &= ~DR6_RESERVED; + /* + * If dr6 has no reason to give us about the origin of this trap, + * then it's very likely the result of an icebp/int01 trap. + * User wants a sigtrap for that. + */ + if (!dr6 && user_mode(regs)) + user_icebp = 1; + /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; @@ -575,7 +584,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) regs->flags &= ~X86_EFLAGS_TF; } si_code = get_si_code(tsk->thread.debugreg6); - if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 61a1e8c7e19..cd6da6bf3ec 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -5,6 +5,7 @@ */ #include <linux/init.h> #include <linux/ioport.h> +#include <linux/module.h> #include <asm/bios_ebda.h> #include <asm/paravirt.h> @@ -85,6 +86,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { }; static void default_nmi_init(void) { }; +static int default_i8042_detect(void) { return 1; }; struct x86_platform_ops x86_platform = { .calibrate_tsc = native_calibrate_tsc, @@ -92,5 +94,8 @@ struct x86_platform_ops x86_platform = { .set_wallclock = mach_set_rtc_mmss, .iommu_shutdown = iommu_shutdown_noop, .is_untracked_pat_range = is_ISA_range, - .nmi_init = default_nmi_init + .nmi_init = default_nmi_init, + .i8042_detect = default_i8042_detect }; + +EXPORT_SYMBOL_GPL(x86_platform); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 81563e76e28..b1ed0a1a591 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1815,6 +1815,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_WRITABLE_MASK; + if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK)) + spte &= ~PT_USER_MASK; + /* * Optimization: for pte sync, if spte was writable the hash * lookup is unnecessary (and expensive). Write protection @@ -1870,10 +1873,14 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, child = page_header(pte & PT64_BASE_ADDR_MASK); mmu_page_remove_parent_pte(child, sptep); + __set_spte(sptep, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(vcpu->kvm); } else if (pfn != spte_to_pfn(*sptep)) { pgprintk("hfn old %lx new %lx\n", spte_to_pfn(*sptep), pfn); rmap_remove(vcpu->kvm, sptep); + __set_spte(sptep, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(vcpu->kvm); } else was_rmapped = 1; } @@ -2919,7 +2926,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) return kvm_mmu_zap_page(kvm, page) + 1; } -static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) +static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { struct kvm *kvm; struct kvm *kvm_freed = NULL; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 89d66ca4d87..2331bdc2b54 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -342,6 +342,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, /* advance table_gfn when emulating 1gb pages with 4k */ if (delta == 0) table_gfn += PT_INDEX(addr, level); + access &= gw->pte_access; } else { direct = 0; table_gfn = gw->table_gfn[level - 2]; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 96dc232bfc5..ce438e0fdd2 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -28,6 +28,7 @@ #include <linux/ftrace_event.h> #include <linux/slab.h> +#include <asm/tlbflush.h> #include <asm/desc.h> #include <asm/virtext.h> @@ -56,6 +57,8 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +static bool erratum_383_found __read_mostly; + static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, @@ -374,6 +377,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, svm->vmcb->control.event_inj_err = error_code; } +static void svm_init_erratum_383(void) +{ + u32 low, high; + int err; + u64 val; + + /* Only Fam10h is affected */ + if (boot_cpu_data.x86 != 0x10) + return; + + /* Use _safe variants to not break nested virtualization */ + val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err); + if (err) + return; + + val |= (1ULL << 47); + + low = lower_32_bits(val); + high = upper_32_bits(val); + + native_write_msr_safe(MSR_AMD64_DC_CFG, low, high); + + erratum_383_found = true; +} + static int has_svm(void) { const char *msg; @@ -429,6 +457,8 @@ static int svm_hardware_enable(void *garbage) wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + svm_init_erratum_383(); + return 0; } @@ -1410,8 +1440,59 @@ static int nm_interception(struct vcpu_svm *svm) return 1; } -static int mc_interception(struct vcpu_svm *svm) +static bool is_erratum_383(void) { + int err, i; + u64 value; + + if (!erratum_383_found) + return false; + + value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err); + if (err) + return false; + + /* Bit 62 may or may not be set for this mce */ + value &= ~(1ULL << 62); + + if (value != 0xb600000000010015ULL) + return false; + + /* Clear MCi_STATUS registers */ + for (i = 0; i < 6; ++i) + native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0); + + value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err); + if (!err) { + u32 low, high; + + value &= ~(1ULL << 2); + low = lower_32_bits(value); + high = upper_32_bits(value); + + native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high); + } + + /* Flush tlb to evict multi-match entries */ + __flush_tlb_all(); + + return true; +} + +static void svm_handle_mce(struct vcpu_svm *svm) +{ + if (is_erratum_383()) { + /* + * Erratum 383 triggered. Guest state is corrupt so kill the + * guest. + */ + pr_err("KVM: Guest triggered AMD Erratum 383\n"); + + set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests); + + return; + } + /* * On an #MC intercept the MCE handler is not called automatically in * the host. So do it by hand here. @@ -1420,6 +1501,11 @@ static int mc_interception(struct vcpu_svm *svm) "int $0x12\n"); /* not sure if we ever come back to this point */ + return; +} + +static int mc_interception(struct vcpu_svm *svm) +{ return 1; } @@ -3088,6 +3174,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); } + + /* + * We need to handle MC intercepts here before the vcpu has a chance to + * change the physical cpu + */ + if (unlikely(svm->vmcb->control.exit_code == + SVM_EXIT_EXCP_BASE + MC_VECTOR)) + svm_handle_mce(svm); } #undef R diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 859a01a07db..ee03679efe7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1744,18 +1744,15 @@ static void enter_lmode(struct kvm_vcpu *vcpu) (guest_tr_ar & ~AR_TYPE_MASK) | AR_TYPE_BUSY_64_TSS); } - vcpu->arch.efer |= EFER_LMA; - vmx_set_efer(vcpu, vcpu->arch.efer); + vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); } static void exit_lmode(struct kvm_vcpu *vcpu) { - vcpu->arch.efer &= ~EFER_LMA; - vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) & ~VM_ENTRY_IA32E_MODE); - vmx_set_efer(vcpu, vcpu->arch.efer); + vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); } #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05d571f6f19..7fa89c39c64 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1562,7 +1562,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, r = -ENOMEM; size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; - entries = vmalloc(size); + entries = kmalloc(size, GFP_KERNEL); if (!entries) goto out; @@ -1581,7 +1581,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, r = n; out_free: - vfree(entries); + kfree(entries); out: return r; } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index acc15b23b74..64121a18b8c 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -302,7 +302,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return -EINVAL; } - new = kmalloc(sizeof(struct memtype), GFP_KERNEL); + new = kzalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) return -ENOMEM; diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index f537087bb74..8acaddd0fb2 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -34,8 +34,7 @@ * memtype_lock protects the rbtree. */ -static void memtype_rb_augment_cb(struct rb_node *node); -static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); +static struct rb_root memtype_rbroot = RB_ROOT; static int is_node_overlap(struct memtype *node, u64 start, u64 end) { @@ -56,7 +55,7 @@ static u64 get_subtree_max_end(struct rb_node *node) } /* Update 'subtree_max_end' for a node, based on node and its children */ -static void update_node_max_end(struct rb_node *node) +static void memtype_rb_augment_cb(struct rb_node *node, void *__unused) { struct memtype *data; u64 max_end, child_max_end; @@ -78,25 +77,6 @@ static void update_node_max_end(struct rb_node *node) data->subtree_max_end = max_end; } -/* Update 'subtree_max_end' for a node and all its ancestors */ -static void update_path_max_end(struct rb_node *node) -{ - u64 old_max_end, new_max_end; - - while (node) { - struct memtype *data = container_of(node, struct memtype, rb); - - old_max_end = data->subtree_max_end; - update_node_max_end(node); - new_max_end = data->subtree_max_end; - - if (new_max_end == old_max_end) - break; - - node = rb_parent(node); - } -} - /* Find the first (lowest start addr) overlapping range from rb tree */ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, u64 start, u64 end) @@ -190,12 +170,6 @@ failure: return -EBUSY; } -static void memtype_rb_augment_cb(struct rb_node *node) -{ - if (node) - update_path_max_end(node); -} - static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) { struct rb_node **node = &(root->rb_node); @@ -213,6 +187,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) rb_link_node(&newdata->rb, parent, node); rb_insert_color(&newdata->rb, root); + rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL); } int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) @@ -226,6 +201,7 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) if (ret_type) new->type = *ret_type; + new->subtree_max_end = new->end; memtype_rb_insert(&memtype_rbroot, new); } return err; @@ -233,13 +209,16 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) struct memtype *rbt_memtype_erase(u64 start, u64 end) { + struct rb_node *deepest; struct memtype *data; data = memtype_rb_exact_match(&memtype_rbroot, start, end); if (!data) goto out; + deepest = rb_augment_erase_begin(&data->rb); rb_erase(&data->rb, &memtype_rbroot); + rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL); out: return data; } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 97da2ba9344..55253095be8 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -96,6 +96,7 @@ EXPORT_SYMBOL(pcibios_align_resource); * the fact the PCI specs explicitly allow address decoders to be * shared between expansion ROMs and other resource regions, it's * at least dangerous) + * - bad resource sizes or overlaps with other regions * * Our solution: * (1) Allocate resources for all buses behind PCI-to-PCI bridges. @@ -136,6 +137,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) * child resource allocations in this * range. */ + r->start = r->end = 0; r->flags = 0; } } @@ -182,6 +184,7 @@ static void __init pcibios_allocate_resources(int pass) idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { /* We'll assign a new address later */ + dev->fw_addr[idx] = r->start; r->end -= r->start; r->start = 0; } diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 7ef3a2735df..cb29191cee5 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c @@ -66,8 +66,9 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) devfn, pos, 4, &pcie_cap)) return 0; - if (pcie_cap == 0xffffffff) - return 0; + if (PCI_EXT_CAP_ID(pcie_cap) == 0x0000 || + PCI_EXT_CAP_ID(pcie_cap) == 0xffff) + break; if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) { raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, @@ -76,7 +77,7 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) return pos; } - pos = pcie_cap >> 20; + pos = PCI_EXT_CAP_NEXT(pcie_cap); } return 0; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 0a979f3e5b8..1290ba54b35 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -105,6 +105,8 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); #endif + ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE, + &ctxt->misc_enable); } /* Needed by apm.c */ @@ -152,6 +154,8 @@ static void fix_processor_context(void) */ static void __restore_processor_state(struct saved_context *ctxt) { + if (ctxt->misc_enable_saved) + wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable); /* * control registers */ |