diff options
Diffstat (limited to 'arch/x86/kernel')
35 files changed, 523 insertions, 204 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ec881c6bfee..b3a71137983 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -509,6 +509,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) return 0; } +EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) { diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d2fdb0826df..57ca7778722 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1086,7 +1086,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, dma_dom->aperture_size += APERTURE_RANGE_SIZE; - /* Intialize the exclusion range if necessary */ + /* Initialize the exclusion range if necessary */ for_each_iommu(iommu) { if (iommu->exclusion_start && iommu->exclusion_start >= dma_dom->aperture[index]->offset @@ -1353,7 +1353,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) /* * Allocates a new protection domain usable for the dma_ops functions. - * It also intializes the page table and the address allocator data + * It also initializes the page table and the address allocator data * structures required for the dma_ops interface */ static struct dma_ops_domain *dma_ops_domain_alloc(void) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index affacb5e006..0a99f7198bc 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -20,6 +20,13 @@ struct pci_device_id amd_nb_misc_ids[] = { }; EXPORT_SYMBOL(amd_nb_misc_ids); +const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { + { 0x00, 0x18, 0x20 }, + { 0xff, 0x00, 0x20 }, + { 0xfe, 0x00, 0x20 }, + { } +}; + struct amd_northbridge_info amd_northbridges; EXPORT_SYMBOL(amd_northbridges); diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 7c9ab59653e..51ef31a89be 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -313,14 +313,16 @@ static void apbt_setup_irq(struct apbt_dev *adev) if (adev->irq == 0) return; + irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); + irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); + /* APB timer irqs are set up as mp_irqs, timer is edge type */ + __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); + if (system_state == SYSTEM_BOOTING) { - irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); - irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); - /* APB timer irqs are set up as mp_irqs, timer is edge type */ - __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); if (request_irq(adev->irq, apbt_interrupt_handler, - IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, - adev->name, adev)) { + IRQF_TIMER | IRQF_DISABLED | + IRQF_NOBALANCING, + adev->name, adev)) { printk(KERN_ERR "Failed request IRQ for APBT%d\n", adev->num); } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index dcd7c83e165..5955a7800a9 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -39,18 +39,6 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; -struct bus_dev_range { - int bus; - int dev_base; - int dev_limit; -}; - -static struct bus_dev_range bus_dev_ranges[] __initdata = { - { 0x00, 0x18, 0x20}, - { 0xff, 0x00, 0x20}, - { 0xfe, 0x00, 0x20} -}; - static struct resource gart_resource = { .name = "GART", .flags = IORESOURCE_MEM, @@ -294,13 +282,13 @@ void __init early_gart_iommu_check(void) search_agp_bridge(&agp_aper_order, &valid_agp); fix = 0; - for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + for (i = 0; amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus; int dev_base, dev_limit; - bus = bus_dev_ranges[i].bus; - dev_base = bus_dev_ranges[i].dev_base; - dev_limit = bus_dev_ranges[i].dev_limit; + bus = amd_nb_bus_dev_ranges[i].bus; + dev_base = amd_nb_bus_dev_ranges[i].dev_base; + dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) @@ -349,13 +337,13 @@ void __init early_gart_iommu_check(void) return; /* disable them all at first */ - for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus; int dev_base, dev_limit; - bus = bus_dev_ranges[i].bus; - dev_base = bus_dev_ranges[i].dev_base; - dev_limit = bus_dev_ranges[i].dev_limit; + bus = amd_nb_bus_dev_ranges[i].bus; + dev_base = amd_nb_bus_dev_ranges[i].dev_base; + dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) @@ -390,14 +378,14 @@ int __init gart_iommu_hole_init(void) fix = 0; node = 0; - for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus; int dev_base, dev_limit; u32 ctl; - bus = bus_dev_ranges[i].bus; - dev_base = bus_dev_ranges[i].dev_base; - dev_limit = bus_dev_ranges[i].dev_limit; + bus = amd_nb_bus_dev_ranges[i].bus; + dev_base = amd_nb_bus_dev_ranges[i].dev_base; + dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) @@ -505,7 +493,7 @@ out: } /* Fix up the north bridges */ - for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus, dev_base, dev_limit; /* @@ -514,9 +502,9 @@ out: */ u32 ctl = DISTLBWALKPRB | aper_order << 1; - bus = bus_dev_ranges[i].bus; - dev_base = bus_dev_ranges[i].dev_base; - dev_limit = bus_dev_ranges[i].dev_limit; + bus = amd_nb_bus_dev_ranges[i].bus; + dev_base = amd_nb_bus_dev_ranges[i].dev_base; + dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) continue; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 79e6baa8aa0..06c196d7e59 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -49,8 +49,8 @@ #include <asm/mtrr.h> #include <asm/smp.h> #include <asm/mce.h> -#include <asm/kvm_para.h> #include <asm/tsc.h> +#include <asm/hypervisor.h> unsigned int num_processors; @@ -684,7 +684,7 @@ static int __init calibrate_APIC_clock(void) lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, lapic_clockevent.shift); lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); @@ -1476,7 +1476,8 @@ void __init enable_IR_x2apic(void) /* IR is required if there is APIC ID > 255 even when running * under KVM */ - if (max_physical_apicid > 255 || !kvm_para_available()) + if (max_physical_apicid > 255 || + !hypervisor_x2apic_available()) goto nox2apic; /* * without IR all CPUs can be addressed by IOAPIC/MSI diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 72ec29e1ae0..79fd43ca6f9 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -68,7 +68,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: - case DIE_NMI_IPI: break; default: @@ -96,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, static __read_mostly struct notifier_block backtrace_notifier = { .notifier_call = arch_trigger_all_cpu_backtrace_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static int __init register_trigger_all_cpu_backtrace(void) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index ecca5f41ad2..bd16b58b885 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -378,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = { static __cpuinit void set_x2apic_extra_bits(int pnode) { - __this_cpu_write(x2apic_extra_bits, (pnode << 6)); + __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); } /* @@ -641,7 +641,7 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { - if (reason != DIE_NMI_IPI) + if (reason != DIE_NMIUNKNOWN) return NOTIFY_OK; if (in_crash_kexec) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index e7dbde7bfed..a7797197956 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -25,6 +25,7 @@ #include <linux/gfp.h> #include <asm/mce.h> #include <asm/apic.h> +#include <asm/nmi.h> /* Update fake mce registers on current CPU. */ static void inject_mce(struct mce *m) @@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) return NOTIFY_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) @@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self, static struct notifier_block mce_raise_nb = { .notifier_call = mce_raise_notify, - .priority = 1000, + .priority = NMI_LOCAL_NORMAL_PRIOR, }; /* Inject mce on current CPU */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 04921017abe..9d977a2ea69 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1267,7 +1267,6 @@ perf_event_nmi_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: - case DIE_NMI_IPI: break; case DIE_NMIUNKNOWN: this_nmi = percpu_read(irq_stat.__nmi_count); @@ -1317,7 +1316,7 @@ perf_event_nmi_handler(struct notifier_block *self, static __read_mostly struct notifier_block perf_event_nmi_notifier = { .notifier_call = perf_event_nmi_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static struct event_constraint unconstrained; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 81400b93e69..e56b9bfbabd 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -753,19 +753,21 @@ out: static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) { - int overflow = 0; - u32 low, high; + u64 v; - rdmsr(hwc->config_base + hwc->idx, low, high); - - /* we need to check high bit for unflagged overflows */ - if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { - overflow = 1; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, - ((u64)low) & ~P4_CCCR_OVF); + /* an official way for overflow indication */ + rdmsrl(hwc->config_base + hwc->idx, v); + if (v & P4_CCCR_OVF) { + wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); + return 1; } - return overflow; + /* it might be unflagged overflow */ + rdmsrl(hwc->event_base + hwc->idx, v); + if (!(v & ARCH_P4_CNTRVAL_MASK)) + return 1; + + return 0; } static void p4_pmu_disable_pebs(void) @@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = { */ .num_counters = ARCH_P4_MAX_CCCR, .apic = 1, - .cntval_bits = 40, - .cntval_mask = (1ULL << 40) - 1, - .max_period = (1ULL << 39) - 1, + .cntval_bits = ARCH_P4_CNTRVAL_BITS, + .cntval_mask = ARCH_P4_CNTRVAL_MASK, + .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 8474c998cbd..df20723a6a1 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -197,14 +197,8 @@ void show_stack(struct task_struct *task, unsigned long *sp) */ void dump_stack(void) { - unsigned long bp = 0; unsigned long stack; -#ifdef CONFIG_FRAME_POINTER - if (!bp) - get_bp(bp); -#endif - printk("Pid: %d, comm: %.20s %s %s %.*s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, @@ -240,6 +234,7 @@ unsigned __kprobes long oops_begin(void) bust_spinlocks(1); return flags; } +EXPORT_SYMBOL_GPL(oops_begin); void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) { diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 0c2b7ef7a34..294f26da0c0 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -14,6 +14,7 @@ #include <linux/bootmem.h> #include <linux/pfn.h> #include <linux/suspend.h> +#include <linux/acpi.h> #include <linux/firmware-map.h> #include <linux/memblock.h> diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 591e6010427..c8b4efad7eb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1406,6 +1406,16 @@ ENTRY(general_protection) CFI_ENDPROC END(general_protection) +#ifdef CONFIG_KVM_GUEST +ENTRY(async_page_fault) + RING0_EC_FRAME + pushl $do_async_page_fault + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +END(apf_page_fault) +#endif + /* * End of kprobes section */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e3ba417e869..aed1ffbeb0c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64) ENTRY(save_args) XCPT_FRAME cld - movq_cfi rdi, RDI+16-ARGOFFSET - movq_cfi rsi, RSI+16-ARGOFFSET - movq_cfi rdx, RDX+16-ARGOFFSET - movq_cfi rcx, RCX+16-ARGOFFSET - movq_cfi rax, RAX+16-ARGOFFSET - movq_cfi r8, R8+16-ARGOFFSET - movq_cfi r9, R9+16-ARGOFFSET - movq_cfi r10, R10+16-ARGOFFSET - movq_cfi r11, R11+16-ARGOFFSET - - leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ + /* + * start from rbp in pt_regs and jump over + * return address. + */ + movq_cfi rdi, RDI+8-RBP + movq_cfi rsi, RSI+8-RBP + movq_cfi rdx, RDX+8-RBP + movq_cfi rcx, RCX+8-RBP + movq_cfi rax, RAX+8-RBP + movq_cfi r8, R8+8-RBP + movq_cfi r9, R9+8-RBP + movq_cfi r10, R10+8-RBP + movq_cfi r11, R11+8-RBP + + leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ movq_cfi rbp, 8 /* push %rbp */ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ testl $3, CS(%rdi) @@ -782,8 +786,9 @@ END(interrupt) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func - subq $ORIG_RAX-ARGOFFSET+8, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 + /* reserve pt_regs for scratch regs and rbp */ + subq $ORIG_RAX-RBP, %rsp + CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP call save_args PARTIAL_FRAME 0 call \func @@ -808,9 +813,14 @@ ret_from_intr: TRACE_IRQS_OFF decl PER_CPU_VAR(irq_count) leaveq + CFI_RESTORE rbp CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 + + /* we did not save rbx, restore only from ARGOFFSET */ + addq $8, %rsp + CFI_ADJUST_CFA_OFFSET -8 exit_intr: GET_THREAD_INFO(%rcx) testl $3,CS-ARGOFFSET(%rsp) @@ -1319,6 +1329,9 @@ errorentry xen_stack_segment do_stack_segment #endif errorentry general_protection do_general_protection errorentry page_fault do_page_fault +#ifdef CONFIG_KVM_GUEST +errorentry async_page_fault do_async_page_fault +#endif #ifdef CONFIG_X86_MCE paranoidzeroentry machine_check *machine_check_vector(%rip) #endif diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9f54b209c37..fc293dc8dc3 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -126,7 +126,7 @@ ENTRY(startup_32) movsl movl pa(boot_params) + NEW_CL_POINTER,%esi andl %esi,%esi - jz 1f # No comand line + jz 1f # No command line movl $pa(boot_command_line),%edi movl $(COMMAND_LINE_SIZE/4),%ecx rep diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 58bb239a2fd..e60c38cc0ee 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -169,6 +169,7 @@ int init_fpu(struct task_struct *tsk) set_stopped_child_used_math(tsk); return 0; } +EXPORT_SYMBOL_GPL(init_fpu); /* * The xstateregs_active() routine is the same as the fpregs_active() routine, diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3a43caa3beb..52945da52a9 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -4,6 +4,7 @@ #include <linux/cpu.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> +#include <linux/of.h> #include <linux/seq_file.h> #include <linux/smp.h> #include <linux/ftrace.h> @@ -275,6 +276,15 @@ void smp_x86_platform_ipi(struct pt_regs *regs) EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); +#ifdef CONFIG_OF +unsigned int irq_create_of_mapping(struct device_node *controller, + const u32 *intspec, unsigned int intsize) +{ + return intspec[0]; +} +EXPORT_SYMBOL_GPL(irq_create_of_mapping); +#endif + #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 48ff6dcffa0..9974d21048f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -129,8 +129,7 @@ void __cpuinit irq_ctx_init(int cpu) irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), THREAD_FLAGS, THREAD_ORDER)); - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; + memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); @@ -140,10 +139,8 @@ void __cpuinit irq_ctx_init(int cpu) irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), THREAD_FLAGS, THREAD_ORDER)); - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; + memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = 0; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); per_cpu(softirq_ctx, cpu) = irqctx; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index cd21b654dec..a4130005028 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -48,6 +48,7 @@ #include <asm/apicdef.h> #include <asm/system.h> #include <asm/apic.h> +#include <asm/nmi.h> struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -525,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; - case DIE_NMI_IPI: - /* Just ignore, we will handle the roundup on DIE_NMI. */ - return NOTIFY_DONE; - case DIE_NMIUNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; @@ -606,7 +603,7 @@ static struct notifier_block kgdb_notifier = { /* * Lowest-prio notifier priority, we want to be notified last: */ - .priority = -INT_MAX, + .priority = NMI_LOCAL_LOW_PRIOR, }; /** diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 63b0ec8d3d4..8dc44662394 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -27,16 +27,37 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/hardirq.h> +#include <linux/notifier.h> +#include <linux/reboot.h> +#include <linux/hash.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/kprobes.h> #include <asm/timer.h> +#include <asm/cpu.h> +#include <asm/traps.h> +#include <asm/desc.h> +#include <asm/tlbflush.h> #define MMU_QUEUE_SIZE 1024 +static int kvmapf = 1; + +static int parse_no_kvmapf(char *arg) +{ + kvmapf = 0; + return 0; +} + +early_param("no-kvmapf", parse_no_kvmapf); + struct kvm_para_state { u8 mmu_queue[MMU_QUEUE_SIZE]; int mmu_queue_len; }; static DEFINE_PER_CPU(struct kvm_para_state, para_state); +static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); static struct kvm_para_state *kvm_para_state(void) { @@ -50,6 +71,195 @@ static void kvm_io_delay(void) { } +#define KVM_TASK_SLEEP_HASHBITS 8 +#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS) + +struct kvm_task_sleep_node { + struct hlist_node link; + wait_queue_head_t wq; + u32 token; + int cpu; + bool halted; + struct mm_struct *mm; +}; + +static struct kvm_task_sleep_head { + spinlock_t lock; + struct hlist_head list; +} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; + +static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, + u32 token) +{ + struct hlist_node *p; + + hlist_for_each(p, &b->list) { + struct kvm_task_sleep_node *n = + hlist_entry(p, typeof(*n), link); + if (n->token == token) + return n; + } + + return NULL; +} + +void kvm_async_pf_task_wait(u32 token) +{ + u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); + struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; + struct kvm_task_sleep_node n, *e; + DEFINE_WAIT(wait); + int cpu, idle; + + cpu = get_cpu(); + idle = idle_cpu(cpu); + put_cpu(); + + spin_lock(&b->lock); + e = _find_apf_task(b, token); + if (e) { + /* dummy entry exist -> wake up was delivered ahead of PF */ + hlist_del(&e->link); + kfree(e); + spin_unlock(&b->lock); + return; + } + + n.token = token; + n.cpu = smp_processor_id(); + n.mm = current->active_mm; + n.halted = idle || preempt_count() > 1; + atomic_inc(&n.mm->mm_count); + init_waitqueue_head(&n.wq); + hlist_add_head(&n.link, &b->list); + spin_unlock(&b->lock); + + for (;;) { + if (!n.halted) + prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); + if (hlist_unhashed(&n.link)) + break; + + if (!n.halted) { + local_irq_enable(); + schedule(); + local_irq_disable(); + } else { + /* + * We cannot reschedule. So halt. + */ + native_safe_halt(); + local_irq_disable(); + } + } + if (!n.halted) + finish_wait(&n.wq, &wait); + + return; +} +EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); + +static void apf_task_wake_one(struct kvm_task_sleep_node *n) +{ + hlist_del_init(&n->link); + if (!n->mm) + return; + mmdrop(n->mm); + if (n->halted) + smp_send_reschedule(n->cpu); + else if (waitqueue_active(&n->wq)) + wake_up(&n->wq); +} + +static void apf_task_wake_all(void) +{ + int i; + + for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { + struct hlist_node *p, *next; + struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; + spin_lock(&b->lock); + hlist_for_each_safe(p, next, &b->list) { + struct kvm_task_sleep_node *n = + hlist_entry(p, typeof(*n), link); + if (n->cpu == smp_processor_id()) + apf_task_wake_one(n); + } + spin_unlock(&b->lock); + } +} + +void kvm_async_pf_task_wake(u32 token) +{ + u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); + struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; + struct kvm_task_sleep_node *n; + + if (token == ~0) { + apf_task_wake_all(); + return; + } + +again: + spin_lock(&b->lock); + n = _find_apf_task(b, token); + if (!n) { + /* + * async PF was not yet handled. + * Add dummy entry for the token. + */ + n = kmalloc(sizeof(*n), GFP_ATOMIC); + if (!n) { + /* + * Allocation failed! Busy wait while other cpu + * handles async PF. + */ + spin_unlock(&b->lock); + cpu_relax(); + goto again; + } + n->token = token; + n->cpu = smp_processor_id(); + n->mm = NULL; + init_waitqueue_head(&n->wq); + hlist_add_head(&n->link, &b->list); + } else + apf_task_wake_one(n); + spin_unlock(&b->lock); + return; +} +EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); + +u32 kvm_read_and_reset_pf_reason(void) +{ + u32 reason = 0; + + if (__get_cpu_var(apf_reason).enabled) { + reason = __get_cpu_var(apf_reason).reason; + __get_cpu_var(apf_reason).reason = 0; + } + + return reason; +} +EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); + +dotraplinkage void __kprobes +do_async_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + switch (kvm_read_and_reset_pf_reason()) { + default: + do_page_fault(regs, error_code); + break; + case KVM_PV_REASON_PAGE_NOT_PRESENT: + /* page is swapped out by the host. */ + kvm_async_pf_task_wait((u32)read_cr2()); + break; + case KVM_PV_REASON_PAGE_READY: + kvm_async_pf_task_wake((u32)read_cr2()); + break; + } +} + static void kvm_mmu_op(void *buffer, unsigned len) { int r; @@ -231,10 +441,117 @@ static void __init paravirt_ops_setup(void) #endif } +void __cpuinit kvm_guest_cpu_init(void) +{ + if (!kvm_para_available()) + return; + + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { + u64 pa = __pa(&__get_cpu_var(apf_reason)); + +#ifdef CONFIG_PREEMPT + pa |= KVM_ASYNC_PF_SEND_ALWAYS; +#endif + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); + __get_cpu_var(apf_reason).enabled = 1; + printk(KERN_INFO"KVM setup async PF for cpu %d\n", + smp_processor_id()); + } +} + +static void kvm_pv_disable_apf(void *unused) +{ + if (!__get_cpu_var(apf_reason).enabled) + return; + + wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); + __get_cpu_var(apf_reason).enabled = 0; + + printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", + smp_processor_id()); +} + +static int kvm_pv_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (code == SYS_RESTART) + on_each_cpu(kvm_pv_disable_apf, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_reboot_nb = { + .notifier_call = kvm_pv_reboot_notify, +}; + +#ifdef CONFIG_SMP +static void __init kvm_smp_prepare_boot_cpu(void) +{ +#ifdef CONFIG_KVM_CLOCK + WARN_ON(kvm_register_clock("primary cpu clock")); +#endif + kvm_guest_cpu_init(); + native_smp_prepare_boot_cpu(); +} + +static void kvm_guest_cpu_online(void *dummy) +{ + kvm_guest_cpu_init(); +} + +static void kvm_guest_cpu_offline(void *dummy) +{ + kvm_pv_disable_apf(NULL); + apf_task_wake_all(); +} + +static int __cpuinit kvm_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (unsigned long)hcpu; + switch (action) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + case CPU_ONLINE_FROZEN: + smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0); + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata kvm_cpu_notifier = { + .notifier_call = kvm_cpu_notify, +}; +#endif + +static void __init kvm_apf_trap_init(void) +{ + set_intr_gate(14, &async_page_fault); +} + void __init kvm_guest_init(void) { + int i; + if (!kvm_para_available()) return; paravirt_ops_setup(); + register_reboot_notifier(&kvm_pv_reboot_nb); + for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) + spin_lock_init(&async_pf_sleepers[i].lock); + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) + x86_init.irqs.trap_init = kvm_apf_trap_init; + +#ifdef CONFIG_SMP + smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; + register_cpu_notifier(&kvm_cpu_notifier); +#else + kvm_guest_cpu_init(); +#endif } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ca43ce31a19..f98d3eafe07 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -125,7 +125,7 @@ static struct clocksource kvm_clock = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static int kvm_register_clock(char *txt) +int kvm_register_clock(char *txt) { int cpu = smp_processor_id(); int low, high, ret; @@ -152,14 +152,6 @@ static void __cpuinit kvm_setup_secondary_clock(void) } #endif -#ifdef CONFIG_SMP -static void __init kvm_smp_prepare_boot_cpu(void) -{ - WARN_ON(kvm_register_clock("primary cpu clock")); - native_smp_prepare_boot_cpu(); -} -#endif - /* * After the clock is registered, the host will keep writing to the * registered memory location. If the guest happens to shutdown, this memory @@ -206,9 +198,6 @@ void __init kvmclock_init(void) x86_cpuinit.setup_percpu_clockev = kvm_setup_secondary_clock; #endif -#ifdef CONFIG_SMP - smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; -#endif machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC machine_ops.crash_shutdown = kvm_crash_shutdown; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 8f295609173..ab23f1ad4bf 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -37,20 +37,11 @@ void *module_alloc(unsigned long size) { - struct vm_struct *area; - - if (!size) - return NULL; - size = PAGE_ALIGN(size); - if (size > MODULES_LEN) + if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - - area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END); - if (!area) - return NULL; - - return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM, - PAGE_KERNEL_EXEC); + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, + -1, __builtin_return_address(0)); } /* Free memory returned from module_alloc */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c5b250011fd..869e1aeeb71 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -421,8 +421,11 @@ struct pv_mmu_ops pv_mmu_ops = { .set_pte = native_set_pte, .set_pte_at = native_set_pte_at, .set_pmd = native_set_pmd, + .set_pmd_at = native_set_pmd_at, .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, + .pmd_update = paravirt_nop, + .pmd_update_defer = paravirt_nop, .ptep_modify_prot_start = __ptep_modify_prot_start, .ptep_modify_prot_commit = __ptep_modify_prot_commit, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 09c08a1c706..d8286ed54ff 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -22,11 +22,6 @@ #include <asm/i387.h> #include <asm/debugreg.h> -unsigned long idle_halt; -EXPORT_SYMBOL(idle_halt); -unsigned long idle_nomwait; -EXPORT_SYMBOL(idle_nomwait); - struct kmem_cache *task_xstate_cachep; EXPORT_SYMBOL_GPL(task_xstate_cachep); @@ -327,7 +322,7 @@ long sys_execve(const char __user *name, /* * Idle related variables and functions */ -unsigned long boot_option_idle_override = 0; +unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); /* @@ -386,6 +381,8 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; + trace_power_end(smp_processor_id()); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); /* loop is done by the caller */ @@ -443,8 +440,6 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { - trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); - trace_cpu_idle((ax>>4)+1, smp_processor_id()); if (!need_resched()) { if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -471,6 +466,8 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); + trace_power_end(smp_processor_id()); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } else local_irq_enable(); } @@ -503,7 +500,6 @@ static void poll_idle(void) * * idle=mwait overrides this decision and forces the usage of mwait. */ -static int __cpuinitdata force_mwait; #define MWAIT_INFO 0x05 #define MWAIT_ECX_EXTENDED_INFO 0x01 @@ -513,7 +509,7 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - if (force_mwait) + if (boot_option_idle_override == IDLE_FORCE_MWAIT) return 1; if (c->cpuid_level < MWAIT_INFO) @@ -633,9 +629,10 @@ static int __init idle_setup(char *str) if (!strcmp(str, "poll")) { printk("using polling idle threads.\n"); pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else if (!strcmp(str, "halt")) { + boot_option_idle_override = IDLE_POLL; + } else if (!strcmp(str, "mwait")) { + boot_option_idle_override = IDLE_FORCE_MWAIT; + } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is * forced to be used for CPU idle. In such case CPU C2/C3 @@ -644,8 +641,7 @@ static int __init idle_setup(char *str) * the boot_option_idle_override. */ pm_idle = default_idle; - idle_halt = 1; - return 0; + boot_option_idle_override = IDLE_HALT; } else if (!strcmp(str, "nomwait")) { /* * If the boot option of "idle=nomwait" is added, @@ -653,12 +649,10 @@ static int __init idle_setup(char *str) * states. In such case it won't touch the variable * of boot_option_idle_override. */ - idle_nomwait = 1; - return 0; + boot_option_idle_override = IDLE_NOMWAIT; } else return -1; - boot_option_idle_override = 1; return 0; } early_param("idle", idle_setup); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4b9befa0e34..8d128783af4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -57,8 +57,6 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> -#include <trace/events/power.h> - asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); /* @@ -113,8 +111,6 @@ void cpu_idle(void) stop_critical_timings(); pm_idle(); start_critical_timings(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4c818a73839..bd387e8f73b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,8 +51,6 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> -#include <trace/events/power.h> - asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); @@ -141,10 +139,6 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, - smp_processor_id()); - /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c495aa8d481..fc7aae1e2bc 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -18,6 +18,7 @@ #include <asm/pci_x86.h> #include <asm/virtext.h> #include <asm/cpu.h> +#include <asm/nmi.h> #ifdef CONFIG_X86_32 # include <linux/ctype.h> @@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self, { int cpu; - if (val != DIE_NMI_IPI) + if (val != DIE_NMI) return NOTIFY_OK; cpu = raw_smp_processor_id(); @@ -778,6 +779,8 @@ static void smp_send_nmi_allbutself(void) static struct notifier_block crash_nmi_nb = { .notifier_call = crash_nmi_callback, + /* we want to be the first one called */ + .priority = NMI_LOCAL_HIGH_PRIOR+1, }; /* Halt all other CPUs, calling the specified function on each of them diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 1cfbbfc3ae2..6f39cab052d 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -76,7 +76,7 @@ int mach_set_rtc_mmss(unsigned long nowtime) CMOS_WRITE(real_seconds, RTC_SECONDS); CMOS_WRITE(real_minutes, RTC_MINUTES); } else { - printk(KERN_WARNING + printk_once(KERN_NOTICE "set_rtc_mmss: can't update from %d to %d\n", cmos_minutes, real_minutes); retval = -1; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c7149c96d07..763df77343d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -97,12 +97,12 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); */ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); -void cpu_hotplug_driver_lock() +void cpu_hotplug_driver_lock(void) { mutex_lock(&x86_cpu_hotplug_driver_mutex); } -void cpu_hotplug_driver_unlock() +void cpu_hotplug_driver_unlock(void) { mutex_unlock(&x86_cpu_hotplug_driver_mutex); } diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index c2f1b26141e..998e972f3b1 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -133,7 +133,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, pmd = pmd_alloc(&tboot_mm, pud, vaddr); if (!pmd) return -1; - pte = pte_alloc_map(&tboot_mm, pmd, vaddr); + pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr); if (!pte) return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c76aaca5694..b9b67166f9d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -84,6 +84,11 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; int unknown_nmi_panic; +/* + * Prevent NMI reason port (0x61) being accessed simultaneously, can + * only be used in NMI handler. + */ +static DEFINE_RAW_SPINLOCK(nmi_reason_lock); static inline void conditional_sti(struct pt_regs *regs) { @@ -310,15 +315,15 @@ static int __init setup_unknown_nmi_panic(char *str) __setup("unknown_nmi_panic", setup_unknown_nmi_panic); static notrace __kprobes void -mem_parity_error(unsigned char reason, struct pt_regs *regs) +pci_serr_error(unsigned char reason, struct pt_regs *regs) { - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - printk(KERN_EMERG - "You have some hardware problem, likely on the PCI bus.\n"); + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + /* + * On some machines, PCI SERR line is used to report memory + * errors. EDAC makes use of it. + */ #if defined(CONFIG_EDAC) if (edac_handler_set()) { edac_atomic_assert_error(); @@ -329,11 +334,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + pr_emerg("Dazed and confused, but trying to continue\n"); - /* Clear and disable the memory parity error line. */ - reason = (reason & 0xf) | 4; - outb(reason, 0x61); + /* Clear and disable the PCI SERR error line. */ + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; + outb(reason, NMI_REASON_PORT); } static notrace __kprobes void @@ -341,15 +346,17 @@ io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + pr_emerg( + "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); show_registers(regs); if (panic_on_io_nmi) panic("NMI IOCK error: Not continuing"); /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & 0xf) | 8; - outb(reason, 0x61); + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); i = 20000; while (--i) { @@ -357,8 +364,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs) udelay(100); } - reason &= ~8; - outb(reason, 0x61); + reason &= ~NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); } static notrace __kprobes void @@ -377,57 +384,50 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) return; } #endif - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); + pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); - printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); + pr_emerg("Do you have a strange power saving mode enabled?\n"); if (unknown_nmi_panic || panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + pr_emerg("Dazed and confused, but trying to continue\n"); } static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; - int cpu; - cpu = smp_processor_id(); - - /* Only the BSP gets external NMIs from the system. */ - if (!cpu) - reason = get_nmi_reason(); + /* + * CPU-specific NMI must be processed before non-CPU-specific + * NMI, otherwise we may lose it, because the CPU-specific + * NMI can not be detected/processed on other CPUs. + */ + if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; - if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; + /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ + raw_spin_lock(&nmi_reason_lock); + reason = get_nmi_reason(); -#ifdef CONFIG_X86_LOCAL_APIC - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; + if (reason & NMI_REASON_MASK) { + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + else if (reason & NMI_REASON_IOCHK) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active + * meanwhile as it's edge-triggered: + */ + reassert_nmi(); #endif - unknown_nmi_error(reason, regs); - + raw_spin_unlock(&nmi_reason_lock); return; } - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) - return; + raw_spin_unlock(&nmi_reason_lock); - /* AK: following checks seem to be broken on modern chipsets. FIXME */ - if (reason & 0x80) - mem_parity_error(reason, regs); - if (reason & 0x40) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active meanwhile - * as it's edge-triggered: - */ - reassert_nmi(); -#endif + unknown_nmi_error(reason, regs); } dotraplinkage notrace __kprobes void diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 03d2ea82f35..ffe5755caa8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -464,7 +464,7 @@ unsigned long native_calibrate_tsc(void) tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); /* hpet or pmtimer available ? */ - if (!hpet && !ref1 && !ref2) + if (ref1 == ref2) continue; /* Check, whether the sampling was disturbed by an SMI */ @@ -935,7 +935,7 @@ static void tsc_refine_calibration_work(struct work_struct *work) tsc_stop = tsc_read_refs(&ref_stop, hpet); /* hpet or pmtimer available ? */ - if (!hpet && !ref_start && !ref_stop) + if (ref_start == ref_stop) goto out; /* Check, whether the sampling was disturbed by an SMI */ @@ -965,7 +965,7 @@ out: static int __init init_tsc_clocksource(void) { - if (!cpu_has_tsc || tsc_disabled > 0) + if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) return 0; if (tsc_clocksource_reliable) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 61fb9851962..863f8753ab0 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -179,6 +179,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) if (pud_none_or_clear_bad(pud)) goto out; pmd = pmd_offset(pud, 0xA0000); + split_huge_page_pmd(mm, pmd); if (pmd_none_or_clear_bad(pmd)) goto out; pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bf470075518..b34ab80fddd 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -34,11 +34,9 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) #ifdef CONFIG_X86_32 OUTPUT_ARCH(i386) ENTRY(phys_startup_32) -jiffies = jiffies_64; #else OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) -jiffies_64 = jiffies; #endif #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) @@ -142,6 +140,15 @@ SECTIONS CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) DATA_DATA + /* + * Workaround a binutils (2.20.51.0.12 to 2.21.51.0.3) bug. + * This makes jiffies relocatable in such binutils + */ +#ifdef CONFIG_X86_32 + jiffies = jiffies_64; +#else + jiffies_64 = jiffies; +#endif CONSTRUCTORS /* rarely changed data like cpu maps */ |