diff options
Diffstat (limited to 'arch/x86')
40 files changed, 497 insertions, 231 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5b2196ab816..bc25b9f5e4c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -252,16 +252,13 @@ config SMP config X86_X2APIC bool "Support x2apic" - depends on X86_LOCAL_APIC && X86_64 + depends on X86_LOCAL_APIC && X86_64 && INTR_REMAP ---help--- This enables x2apic support on CPUs that have this feature. This allows 32-bit apic IDs (so it can support very large systems), and accesses the local apic via MSRs not via mmio. - ( On certain CPU models you may need to enable INTR_REMAP too, - to get functional x2apic mode. ) - If you don't know what to do here, say N. config SPARSE_IRQ @@ -1881,7 +1878,6 @@ config DMAR_FLOPPY_WA config INTR_REMAP bool "Support for Interrupt Remapping (EXPERIMENTAL)" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL - select X86_X2APIC ---help--- Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 95d86ce0421..9e0587a3776 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -129,22 +129,18 @@ u16 vga_crtc(void) return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4; } -static void vga_set_480_scanlines(int lines) +static void vga_set_480_scanlines(void) { u16 crtc; /* CRTC base address */ u8 csel; /* CRTC miscellaneous output register */ - u8 ovfw; /* CRTC overflow register */ - int end = lines-1; crtc = vga_crtc(); - ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40); - out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */ out_idx(0x0b, crtc, 0x06); /* Vertical total */ - out_idx(ovfw, crtc, 0x07); /* Vertical overflow */ + out_idx(0x3e, crtc, 0x07); /* Vertical overflow */ out_idx(0xea, crtc, 0x10); /* Vertical sync start */ - out_idx(end, crtc, 0x12); /* Vertical display end */ + out_idx(0xdf, crtc, 0x12); /* Vertical display end */ out_idx(0xe7, crtc, 0x15); /* Vertical blank start */ out_idx(0x04, crtc, 0x16); /* Vertical blank end */ csel = inb(0x3cc); @@ -153,21 +149,38 @@ static void vga_set_480_scanlines(int lines) outb(csel, 0x3c2); } +static void vga_set_vertical_end(int lines) +{ + u16 crtc; /* CRTC base address */ + u8 ovfw; /* CRTC overflow register */ + int end = lines-1; + + crtc = vga_crtc(); + + ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40); + + out_idx(ovfw, crtc, 0x07); /* Vertical overflow */ + out_idx(end, crtc, 0x12); /* Vertical display end */ +} + static void vga_set_80x30(void) { - vga_set_480_scanlines(30*16); + vga_set_480_scanlines(); + vga_set_vertical_end(30*16); } static void vga_set_80x34(void) { + vga_set_480_scanlines(); vga_set_14font(); - vga_set_480_scanlines(34*14); + vga_set_vertical_end(34*14); } static void vga_set_80x60(void) { + vga_set_480_scanlines(); vga_set_8font(); - vga_set_480_scanlines(60*8); + vga_set_vertical_end(60*8); } static int vga_set_mode(struct mode_info *mode) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index df8a300dfe6..42f2f837742 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -107,6 +107,9 @@ extern u32 native_safe_apic_wait_icr_idle(void); extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); +#define EIM_8BIT_APIC_ID 0 +#define EIM_32BIT_APIC_ID 1 + #ifdef CONFIG_X86_X2APIC /* * Make previous memory operations globally visible before diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h index 222802029fa..222802029fa 100755..100644 --- a/arch/x86/include/asm/cpu_debug.h +++ b/arch/x86/include/asm/cpu_debug.h diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0beba0d1468..bb83b1c397a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -154,6 +154,7 @@ * CPUID levels like 0x6, 0xA etc */ #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ /* Virtualization flags: Linux defined */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index cea7b74963e..f82fdc412c6 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -238,7 +238,7 @@ static inline unsigned long dma_alloc_coherent_mask(struct device *dev, dma_mask = dev->coherent_dma_mask; if (!dma_mask) - dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; + dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32); return dma_mask; } @@ -247,10 +247,10 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) { unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); - if (dma_mask <= DMA_24BIT_MASK) + if (dma_mask <= DMA_BIT_MASK(24)) gfp |= GFP_DMA; #ifdef CONFIG_X86_64 - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) + if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) gfp |= GFP_DMA32; #endif return gfp; diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 81937a5dc77..2d81af3974a 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -151,11 +151,11 @@ extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); void native_set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); + phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT static inline void __set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t flags) + phys_addr_t phys, pgprot_t flags) { native_set_fixmap(idx, phys, flags); } diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index e5383e3d2f8..73739322b6d 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -193,8 +193,10 @@ extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); */ extern void early_ioremap_init(void); extern void early_ioremap_reset(void); -extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); -extern void __iomem *early_memremap(unsigned long offset, unsigned long size); +extern void __iomem *early_ioremap(resource_size_t phys_addr, + unsigned long size); +extern void __iomem *early_memremap(resource_size_t phys_addr, + unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); #define IO_SPACE_LIMIT 0xffff diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 373cc2bbcad..9d826e43601 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -162,10 +162,13 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); #ifdef CONFIG_X86_64 -extern int save_IO_APIC_setup(void); -extern void mask_IO_APIC_setup(void); -extern void restore_IO_APIC_setup(void); -extern void reinit_intr_remapped_IO_APIC(int); +extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); +extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); +extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); +extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); +extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); +extern void reinit_intr_remapped_IO_APIC(int intr_remapping, + struct IO_APIC_route_entry **ioapic_entries); #endif extern void probe_nr_irqs_gsi(void); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 7727aa8b7dd..378e3691c08 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -347,7 +347,7 @@ struct pv_mmu_ops { /* Sometimes the physical address is a pfn, and sometimes its an mfn. We can tell which is which from the index. */ void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, - unsigned long phys, pgprot_t flags); + phys_addr_t phys, pgprot_t flags); }; struct raw_spinlock; @@ -1432,7 +1432,7 @@ static inline void arch_leave_lazy_mmu_mode(void) void arch_flush_lazy_mmu_mode(void); static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, - unsigned long phys, pgprot_t flags) + phys_addr_t phys, pgprot_t flags) { pv_mmu_ops.set_fixmap(idx, phys, flags); } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 34c52370f2f..fcf4d92e7e0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -352,6 +352,11 @@ struct i387_soft_struct { u32 entry_eip; }; +struct ymmh_struct { + /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ + u32 ymmh_space[64]; +}; + struct xsave_hdr_struct { u64 xstate_bv; u64 reserved1[2]; @@ -361,6 +366,7 @@ struct xsave_hdr_struct { struct xsave_struct { struct i387_fxsave_struct i387; struct xsave_hdr_struct xsave_hdr; + struct ymmh_struct ymmh; /* new processor state extensions will go here */ } __attribute__ ((packed, aligned (64))); diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index d5cd6c58688..a4737dddfd5 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -50,7 +50,7 @@ #ifdef CONFIG_X86_64 #define NEED_PSE 0 #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) -#define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) +#define NEED_PGE 0 #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index ec666491aaa..72e5a449166 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -269,6 +269,11 @@ struct _xsave_hdr { __u64 reserved2[5]; }; +struct _ymmh_state { + /* 16 * 16 bytes for each YMMH-reg */ + __u32 ymmh_space[64]; +}; + /* * Extended state pointed by the fpstate pointer in the sigcontext. * In addition to the fpstate, information encoded in the xstate_hdr @@ -278,6 +283,7 @@ struct _xsave_hdr { struct _xstate { struct _fpstate fpstate; struct _xsave_hdr xstate_hdr; + struct _ymmh_state ymmh; /* new processor state extensions go here */ }; diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 1a918dde46b..018a0a40079 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) -#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) +#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) +#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) static inline unsigned long pte_mfn(pte_t pte) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 08e9a1ac07a..727acc15234 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -7,6 +7,7 @@ #define XSTATE_FP 0x1 #define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) @@ -15,7 +16,7 @@ /* * These are the features that the OS can handle currently. */ -#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE) +#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) #ifdef CONFIG_X86_64 #define REX_PREFIX "0x48, " diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 85eb8e10081..f2870920f24 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -431,6 +431,12 @@ static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); + if (cpu_has(¤t_cpu_data, X86_FEATURE_ARAT)) { + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; + /* Make LAPIC timer preferrable over percpu HPET */ + lapic_clockevent.rating = 150; + } + memcpy(levt, &lapic_clockevent, sizeof(*levt)); levt->cpumask = cpumask_of(smp_processor_id()); @@ -1304,6 +1310,7 @@ void __init enable_IR_x2apic(void) #ifdef CONFIG_INTR_REMAP int ret; unsigned long flags; + struct IO_APIC_route_entry **ioapic_entries = NULL; if (!cpu_has_x2apic) return; @@ -1334,17 +1341,23 @@ void __init enable_IR_x2apic(void) return; } - ret = save_IO_APIC_setup(); + ioapic_entries = alloc_ioapic_entries(); + if (!ioapic_entries) { + pr_info("Allocate ioapic_entries failed: %d\n", ret); + goto end; + } + + ret = save_IO_APIC_setup(ioapic_entries); if (ret) { pr_info("Saving IO-APIC state failed: %d\n", ret); goto end; } local_irq_save(flags); - mask_IO_APIC_setup(); + mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - ret = enable_intr_remapping(1); + ret = enable_intr_remapping(EIM_32BIT_APIC_ID); if (ret && x2apic_preenabled) { local_irq_restore(flags); @@ -1364,9 +1377,9 @@ end_restore: /* * IR enabling failed */ - restore_IO_APIC_setup(); + restore_IO_APIC_setup(ioapic_entries); else - reinit_intr_remapped_IO_APIC(x2apic_preenabled); + reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries); unmask_8259A(); local_irq_restore(flags); @@ -1379,6 +1392,8 @@ end: pr_info("Enabled Interrupt-remapping\n"); } else pr_err("Failed to enable Interrupt-remapping and x2apic\n"); + if (ioapic_entries) + free_ioapic_entries(ioapic_entries); #else if (!cpu_has_x2apic) return; @@ -1954,6 +1969,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) local_irq_save(flags); disable_local_APIC(); +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + disable_intr_remapping(); +#endif local_irq_restore(flags); return 0; } @@ -1964,15 +1983,41 @@ static int lapic_resume(struct sys_device *dev) unsigned long flags; int maxlvt; +#ifdef CONFIG_INTR_REMAP + int ret; + struct IO_APIC_route_entry **ioapic_entries = NULL; + if (!apic_pm_state.active) return 0; - maxlvt = lapic_get_maxlvt(); - local_irq_save(flags); + if (x2apic) { + ioapic_entries = alloc_ioapic_entries(); + if (!ioapic_entries) { + WARN(1, "Alloc ioapic_entries in lapic resume failed."); + return -ENOMEM; + } + + ret = save_IO_APIC_setup(ioapic_entries); + if (ret) { + WARN(1, "Saving IO-APIC state failed: %d\n", ret); + free_ioapic_entries(ioapic_entries); + return ret; + } + + mask_IO_APIC_setup(ioapic_entries); + mask_8259A(); + enable_x2apic(); + } +#else + if (!apic_pm_state.active) + return 0; + local_irq_save(flags); if (x2apic) enable_x2apic(); +#endif + else { /* * Make sure the APICBASE points to the right address @@ -1986,6 +2031,7 @@ static int lapic_resume(struct sys_device *dev) wrmsr(MSR_IA32_APICBASE, l, h); } + maxlvt = lapic_get_maxlvt(); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -2009,8 +2055,20 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + reenable_intr_remapping(EIM_32BIT_APIC_ID); + + if (x2apic) { + unmask_8259A(); + restore_IO_APIC_setup(ioapic_entries); + free_ioapic_entries(ioapic_entries); + } +#endif + local_irq_restore(flags); + return 0; } @@ -2048,7 +2106,9 @@ static int __init init_lapic_sysfs(void) error = sysdev_register(&device_lapic); return error; } -device_initcall(init_lapic_sysfs); + +/* local apic needs to resume before other devices access its registers. */ +core_initcall(init_lapic_sysfs); #else /* CONFIG_PM */ diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0014714ea97..306e5e88fb6 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -212,7 +212,7 @@ struct apic apic_flat = { .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = NULL, + .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, .write = native_apic_mem_write, @@ -362,7 +362,7 @@ struct apic apic_physflat = { .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = NULL, + .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, .write = native_apic_mem_write, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1bb5c6cee3e..a2789e42e16 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -851,63 +851,74 @@ __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ #ifdef CONFIG_INTR_REMAP -/* I/O APIC RTE contents at the OS boot up */ -static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; +struct IO_APIC_route_entry **alloc_ioapic_entries(void) +{ + int apic; + struct IO_APIC_route_entry **ioapic_entries; + + ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, + GFP_ATOMIC); + if (!ioapic_entries) + return 0; + + for (apic = 0; apic < nr_ioapics; apic++) { + ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_ATOMIC); + if (!ioapic_entries[apic]) + goto nomem; + } + + return ioapic_entries; + +nomem: + while (--apic >= 0) + kfree(ioapic_entries[apic]); + kfree(ioapic_entries); + + return 0; +} /* * Saves all the IO-APIC RTE's */ -int save_IO_APIC_setup(void) +int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) { - union IO_APIC_reg_01 reg_01; - unsigned long flags; int apic, pin; - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } + if (!ioapic_entries) + return -ENOMEM; for (apic = 0; apic < nr_ioapics; apic++) { - early_ioapic_entries[apic] = - kzalloc(sizeof(struct IO_APIC_route_entry) * - nr_ioapic_registers[apic], GFP_KERNEL); - if (!early_ioapic_entries[apic]) - goto nomem; - } + if (!ioapic_entries[apic]) + return -ENOMEM; - for (apic = 0; apic < nr_ioapics; apic++) for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - early_ioapic_entries[apic][pin] = + ioapic_entries[apic][pin] = ioapic_read_entry(apic, pin); + } return 0; - -nomem: - while (apic >= 0) - kfree(early_ioapic_entries[apic--]); - memset(early_ioapic_entries, 0, - ARRAY_SIZE(early_ioapic_entries)); - - return -ENOMEM; } -void mask_IO_APIC_setup(void) +/* + * Mask all IO APIC entries. + */ +void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) { int apic, pin; + if (!ioapic_entries) + return; + for (apic = 0; apic < nr_ioapics; apic++) { - if (!early_ioapic_entries[apic]) + if (!ioapic_entries[apic]) break; + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { struct IO_APIC_route_entry entry; - entry = early_ioapic_entries[apic][pin]; + entry = ioapic_entries[apic][pin]; if (!entry.mask) { entry.mask = 1; ioapic_write_entry(apic, pin, entry); @@ -916,22 +927,30 @@ void mask_IO_APIC_setup(void) } } -void restore_IO_APIC_setup(void) +/* + * Restore IO APIC entries which was saved in ioapic_entries. + */ +int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) { int apic, pin; + if (!ioapic_entries) + return -ENOMEM; + for (apic = 0; apic < nr_ioapics; apic++) { - if (!early_ioapic_entries[apic]) - break; + if (!ioapic_entries[apic]) + return -ENOMEM; + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ioapic_write_entry(apic, pin, - early_ioapic_entries[apic][pin]); - kfree(early_ioapic_entries[apic]); - early_ioapic_entries[apic] = NULL; + ioapic_entries[apic][pin]); } + return 0; } -void reinit_intr_remapped_IO_APIC(int intr_remapping) +void reinit_intr_remapped_IO_APIC(int intr_remapping, + struct IO_APIC_route_entry **ioapic_entries) + { /* * for now plain restore of previous settings. @@ -940,7 +959,17 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping) * table entries. for now, do a plain restore, and wait for * the setup_IO_APIC_irqs() to do proper initialization. */ - restore_IO_APIC_setup(); + restore_IO_APIC_setup(ioapic_entries); +} + +void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) +{ + int apic; + + for (apic = 0; apic < nr_ioapics; apic++) + kfree(ioapic_entries[apic]); + + kfree(ioapic_entries); } #endif @@ -2495,7 +2524,6 @@ static void irq_complete_move(struct irq_desc **descp) static inline void irq_complete_move(struct irq_desc **descp) {} #endif -#ifdef CONFIG_INTR_REMAP static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { int apic, pin; @@ -2529,6 +2557,7 @@ eoi_ioapic_irq(struct irq_desc *desc) spin_unlock_irqrestore(&ioapic_lock, flags); } +#ifdef CONFIG_X86_X2APIC static void ack_x2apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2540,7 +2569,6 @@ static void ack_x2apic_edge(unsigned int irq) { ack_x2APIC_irq(); } - #endif static void ack_apic_edge(unsigned int irq) @@ -2606,6 +2634,9 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); + if (irq_remapped(irq)) + eoi_ioapic_irq(desc); + /* Now we can move and renable the irq */ if (unlikely(do_unmask_irq)) { /* Only migrate the irq if the ack has been received. @@ -2651,6 +2682,26 @@ static void ack_apic_level(unsigned int irq) #endif } +#ifdef CONFIG_INTR_REMAP +static void ir_ack_apic_edge(unsigned int irq) +{ +#ifdef CONFIG_X86_X2APIC + if (x2apic_enabled()) + return ack_x2apic_edge(irq); +#endif + return ack_apic_edge(irq); +} + +static void ir_ack_apic_level(unsigned int irq) +{ +#ifdef CONFIG_X86_X2APIC + if (x2apic_enabled()) + return ack_x2apic_level(irq); +#endif + return ack_apic_level(irq); +} +#endif /* CONFIG_INTR_REMAP */ + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .startup = startup_ioapic_irq, @@ -2670,8 +2721,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { .mask = mask_IO_APIC_irq, .unmask = unmask_IO_APIC_irq, #ifdef CONFIG_INTR_REMAP - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, + .ack = ir_ack_apic_edge, + .eoi = ir_ack_apic_level, #ifdef CONFIG_SMP .set_affinity = set_ir_ioapic_affinity_irq, #endif @@ -3397,7 +3448,7 @@ static struct irq_chip msi_ir_chip = { .unmask = unmask_msi_irq, .mask = mask_msi_irq, #ifdef CONFIG_INTR_REMAP - .ack = ack_x2apic_edge, + .ack = ir_ack_apic_edge, #ifdef CONFIG_SMP .set_affinity = ir_set_msi_irq_affinity, #endif diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 8220ae69849..c965e521271 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, + { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, { 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 46e29ab96c6..46e29ab96c6 100755..100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 19f6b9d27e8..ecdb682ab51 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -68,6 +68,7 @@ struct acpi_cpufreq_data { unsigned int max_freq; unsigned int resume; unsigned int cpu_feature; + u64 saved_aperf, saved_mperf; }; static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); @@ -152,7 +153,8 @@ struct drv_cmd { u32 val; }; -static long do_drv_read(void *_cmd) +/* Called via smp_call_function_single(), on the target CPU */ +static void do_drv_read(void *_cmd) { struct drv_cmd *cmd = _cmd; u32 h; @@ -169,10 +171,10 @@ static long do_drv_read(void *_cmd) default: break; } - return 0; } -static long do_drv_write(void *_cmd) +/* Called via smp_call_function_many(), on the target CPUs */ +static void do_drv_write(void *_cmd) { struct drv_cmd *cmd = _cmd; u32 lo, hi; @@ -191,23 +193,24 @@ static long do_drv_write(void *_cmd) default: break; } - return 0; } static void drv_read(struct drv_cmd *cmd) { cmd->val = 0; - work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); + smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); } static void drv_write(struct drv_cmd *cmd) { - unsigned int i; + int this_cpu; - for_each_cpu(i, cmd->mask) { - work_on_cpu(i, do_drv_write, cmd); - } + this_cpu = get_cpu(); + if (cpumask_test_cpu(this_cpu, cmd->mask)) + do_drv_write(cmd); + smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); + put_cpu(); } static u32 get_cur_val(const struct cpumask *mask) @@ -241,28 +244,23 @@ static u32 get_cur_val(const struct cpumask *mask) return cmd.val; } -struct perf_cur { +struct perf_pair { union { struct { u32 lo; u32 hi; } split; u64 whole; - } aperf_cur, mperf_cur; + } aperf, mperf; }; - -static long read_measured_perf_ctrs(void *_cur) +/* Called via smp_call_function_single(), on the target CPU */ +static void read_measured_perf_ctrs(void *_cur) { - struct perf_cur *cur = _cur; + struct perf_pair *cur = _cur; - rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi); - rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi); - - wrmsr(MSR_IA32_APERF, 0, 0); - wrmsr(MSR_IA32_MPERF, 0, 0); - - return 0; + rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); + rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); } /* @@ -281,52 +279,57 @@ static long read_measured_perf_ctrs(void *_cur) static unsigned int get_measured_perf(struct cpufreq_policy *policy, unsigned int cpu) { - struct perf_cur cur; + struct perf_pair readin, cur; unsigned int perf_percent; unsigned int retval; - if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur)) + if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) return 0; + cur.aperf.whole = readin.aperf.whole - + per_cpu(drv_data, cpu)->saved_aperf; + cur.mperf.whole = readin.mperf.whole - + per_cpu(drv_data, cpu)->saved_mperf; + per_cpu(drv_data, cpu)->saved_aperf = readin.aperf.whole; + per_cpu(drv_data, cpu)->saved_mperf = readin.mperf.whole; + #ifdef __i386__ /* * We dont want to do 64 bit divide with 32 bit kernel * Get an approximate value. Return failure in case we cannot get * an approximate value. */ - if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) { + if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { int shift_count; u32 h; - h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi); + h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); shift_count = fls(h); - cur.aperf_cur.whole >>= shift_count; - cur.mperf_cur.whole >>= shift_count; + cur.aperf.whole >>= shift_count; + cur.mperf.whole >>= shift_count; } - if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) { + if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { int shift_count = 7; - cur.aperf_cur.split.lo >>= shift_count; - cur.mperf_cur.split.lo >>= shift_count; + cur.aperf.split.lo >>= shift_count; + cur.mperf.split.lo >>= shift_count; } - if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo) - perf_percent = (cur.aperf_cur.split.lo * 100) / - cur.mperf_cur.split.lo; + if (cur.aperf.split.lo && cur.mperf.split.lo) + perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; else perf_percent = 0; #else - if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) { + if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { int shift_count = 7; - cur.aperf_cur.whole >>= shift_count; - cur.mperf_cur.whole >>= shift_count; + cur.aperf.whole >>= shift_count; + cur.mperf.whole >>= shift_count; } - if (cur.aperf_cur.whole && cur.mperf_cur.whole) - perf_percent = (cur.aperf_cur.whole * 100) / - cur.mperf_cur.whole; + if (cur.aperf.whole && cur.mperf.whole) + perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; else perf_percent = 0; diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 0bd48e65a0c..ce2ed3e4aad 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -33,7 +33,6 @@ #include <linux/timex.h> #include <linux/io.h> #include <linux/acpi.h> -#include <linux/kernel.h> #include <asm/msr.h> #include <acpi/processor.h> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 61df7753212..18dfa30795c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -18,9 +18,10 @@ #include <linux/init.h> #include <linux/list.h> +#include <trace/syscall.h> + #include <asm/cacheflush.h> #include <asm/ftrace.h> -#include <linux/ftrace.h> #include <asm/nops.h> #include <asm/nmi.h> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3aaf7b9e3a8..c3fe010d74c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -65,7 +65,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, " Spurious interrupts\n"); #endif if (generic_interrupt_extension) { - seq_printf(p, "PLT: "); + seq_printf(p, "%*s: ", prec, "PLT"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); seq_printf(p, " Platform interrupts\n"); diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index a0f3851ef31..2e0eb414095 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -108,40 +108,29 @@ struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -struct update_for_cpu { - const void __user *buf; - size_t size; -}; - -static long update_for_cpu(void *_ufc) -{ - struct update_for_cpu *ufc = _ufc; - int error; - - error = microcode_ops->request_microcode_user(smp_processor_id(), - ufc->buf, ufc->size); - if (error < 0) - return error; - if (!error) - microcode_ops->apply_microcode(smp_processor_id()); - return error; -} - static int do_microcode_update(const void __user *buf, size_t size) { + cpumask_t old; int error = 0; int cpu; - struct update_for_cpu ufc = { .buf = buf, .size = size }; + + old = current->cpus_allowed; for_each_online_cpu(cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; if (!uci->valid) continue; - error = work_on_cpu(cpu, update_for_cpu, &ufc); + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + error = microcode_ops->request_microcode_user(cpu, buf, size); if (error < 0) - break; + goto out; + if (!error) + microcode_ops->apply_microcode(cpu); } +out: + set_cpus_allowed_ptr(current, &old); return error; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index dce99dca6cf..70fd7e414c1 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -679,7 +679,7 @@ void __init get_smp_config(void) __get_smp_config(0); } -static void smp_reserve_bootmem(struct mpf_intel *mpf) +static void __init smp_reserve_bootmem(struct mpf_intel *mpf) { unsigned long size = get_mpc_size(mpf->physptr); #ifdef CONFIG_X86_32 @@ -838,7 +838,7 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; -static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) +static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) { int i; @@ -866,7 +866,8 @@ static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) } } #else /* CONFIG_X86_IO_APIC */ -static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} +static +inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} #endif /* CONFIG_X86_IO_APIC */ static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 90f5b9ef5de..745579bc825 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -40,7 +40,7 @@ EXPORT_SYMBOL(bad_dma_address); to older i386. */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", - .coherent_dma_mask = DMA_32BIT_MASK, + .coherent_dma_mask = DMA_BIT_MASK(32), .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; EXPORT_SYMBOL(x86_dma_fallback_dev); @@ -148,7 +148,7 @@ again: if (!is_buffer_dma_capable(dma_mask, addr, size)) { __free_pages(page, get_order(size)); - if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { + if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { flag = (flag & ~GFP_DMA32) | GFP_DMA; goto again; } @@ -243,7 +243,7 @@ int dma_supported(struct device *dev, u64 mask) /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_24BIT_MASK) + if (mask < DMA_BIT_MASK(24)) return 0; /* Tell the device to use SAC when IOMMU force is on. This @@ -258,7 +258,7 @@ int dma_supported(struct device *dev, u64 mask) SAC for these. Assume all masks <= 40 bits are of this type. Normally this doesn't make any difference, but gives more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) { dev_info(dev, "Force SAC with mask %Lx\n", mask); return 0; } diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index c6d703b3932..71d412a09f3 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -15,7 +15,7 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { - if (*hwdev->dma_mask >= DMA_32BIT_MASK) + if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) printk(KERN_ERR "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", name, (long long)bus, size, diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fe9345c967d..23b7c8f017e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,7 +21,6 @@ #include <linux/audit.h> #include <linux/seccomp.h> #include <linux/signal.h> -#include <linux/ftrace.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -35,6 +34,8 @@ #include <asm/proto.h> #include <asm/ds.h> +#include <trace/syscall.h> + #include "tls.h" enum x86_regset { diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2aef36d8aca..1340dad417f 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -224,6 +224,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), }, }, + { /* Handle problems with rebooting on Dell DXP061 */ + .callback = set_bios_reboot, + .ident = "Dell DXP061", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"), + }, + }, { } }; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 2b54fe002e9..0a5b04aa98f 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -324,7 +324,7 @@ void __ref xsave_cntxt_init(void) } /* - * for now OS knows only about FP/SSE + * Support only the state known to OS. */ pcntxt_mask = pcntxt_mask & XCNTXT_MASK; xsave_init(); diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index be54176e9eb..6340cef6798 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -219,6 +219,22 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0dfa09d69e8..09daebfdb11 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -547,7 +547,7 @@ void __init early_ioremap_reset(void) } static void __init __early_set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t flags) + phys_addr_t phys, pgprot_t flags) { unsigned long addr = __fix_to_virt(idx); pte_t *pte; @@ -566,7 +566,7 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, } static inline void __init early_set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t prot) + phys_addr_t phys, pgprot_t prot) { if (after_paging_init) __set_fixmap(idx, phys, prot); @@ -607,9 +607,10 @@ static int __init check_early_ioremap_leak(void) late_initcall(check_early_ioremap_leak); static void __init __iomem * -__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) { - unsigned long offset, last_addr; + unsigned long offset; + resource_size_t last_addr; unsigned int nrpages; enum fixed_addresses idx0, idx; int i, slot; @@ -625,15 +626,15 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) } if (slot < 0) { - printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n", - phys_addr, size); + printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n", + (u64)phys_addr, size); WARN_ON(1); return NULL; } if (early_ioremap_debug) { - printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", - phys_addr, size, slot); + printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ", + (u64)phys_addr, size, slot); dump_stack(); } @@ -680,13 +681,15 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) } /* Remap an IO device */ -void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size) +void __init __iomem * +early_ioremap(resource_size_t phys_addr, unsigned long size) { return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); } /* Remap memory */ -void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size) +void __init __iomem * +early_memremap(resource_size_t phys_addr, unsigned long size) { return __early_ioremap(phys_addr, size, PAGE_KERNEL); } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 640339ee4fb..c009a241d56 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -31,7 +31,7 @@ #ifdef CONFIG_X86_PAT int __read_mostly pat_enabled = 1; -void __cpuinit pat_disable(const char *reason) +static inline void pat_disable(const char *reason) { pat_enabled = 0; printk(KERN_INFO "%s\n", reason); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5b7c7c8464f..7aa03a5389f 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -345,7 +345,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) fixmaps_set++; } -void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) +void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, + pgprot_t flags) { __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 82cd39a6cbd..f09e8c36ee8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -42,6 +42,7 @@ #include <asm/xen/hypervisor.h> #include <asm/fixmap.h> #include <asm/processor.h> +#include <asm/proto.h> #include <asm/msr-index.h> #include <asm/setup.h> #include <asm/desc.h> @@ -168,21 +169,23 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; +static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; + static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { + unsigned maskecx = ~0; unsigned maskedx = ~0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ - if (*ax == 1) - maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ - (1 << X86_FEATURE_ACPI) | /* disable ACPI */ - (1 << X86_FEATURE_MCE) | /* disable MCE */ - (1 << X86_FEATURE_MCA) | /* disable MCA */ - (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + if (*ax == 1) { + maskecx = cpuid_leaf1_ecx_mask; + maskedx = cpuid_leaf1_edx_mask; + } asm(XEN_EMULATE_PREFIX "cpuid" : "=a" (*ax), @@ -190,9 +193,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, "=c" (*cx), "=d" (*dx) : "0" (*ax), "2" (*cx)); + + *cx &= maskecx; *dx &= maskedx; } +static __init void xen_init_cpuid_mask(void) +{ + unsigned int ax, bx, cx, dx; + + cpuid_leaf1_edx_mask = + ~((1 << X86_FEATURE_MCE) | /* disable MCE */ + (1 << X86_FEATURE_MCA) | /* disable MCA */ + (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + + if (!xen_initial_domain()) + cpuid_leaf1_edx_mask &= + ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ + (1 << X86_FEATURE_ACPI)); /* disable ACPI */ + + ax = 1; + xen_cpuid(&ax, &bx, &cx, &dx); + + /* cpuid claims we support xsave; try enabling it to see what happens */ + if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { + unsigned long cr4; + + set_in_cr4(X86_CR4_OSXSAVE); + + cr4 = read_cr4(); + + if ((cr4 & X86_CR4_OSXSAVE) == 0) + cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); + + clear_in_cr4(X86_CR4_OSXSAVE); + } +} + static void xen_set_debugreg(int reg, unsigned long val) { HYPERVISOR_set_debugreg(reg, val); @@ -284,12 +321,11 @@ static void xen_set_ldt(const void *addr, unsigned entries) static void xen_load_gdt(const struct desc_ptr *dtr) { - unsigned long *frames; unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned long frames[pages]; int f; - struct multicall_space mcs; /* A GDT can be up to 64k in size, which corresponds to 8192 8-byte entries, or 16 4k pages.. */ @@ -297,19 +333,26 @@ static void xen_load_gdt(const struct desc_ptr *dtr) BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); - mcs = xen_mc_entry(sizeof(*frames) * pages); - frames = mcs.args; - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - frames[f] = arbitrary_virt_to_mfn((void *)va); + int level; + pte_t *ptep = lookup_address(va, &level); + unsigned long pfn, mfn; + void *virt; + + BUG_ON(ptep == NULL); + + pfn = pte_pfn(*ptep); + mfn = pfn_to_mfn(pfn); + virt = __va(PFN_PHYS(pfn)); + + frames[f] = mfn; make_lowmem_page_readonly((void *)va); - make_lowmem_page_readonly(mfn_to_virt(frames[f])); + make_lowmem_page_readonly(virt); } - MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); - - xen_mc_issue(PARAVIRT_LAZY_CPU); + if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) + BUG(); } static void load_TLS_descriptor(struct thread_struct *t, @@ -385,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { - if (val->type != 0xf && val->type != 0xe) + if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) return 0; info->vector = vector; @@ -393,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, info->cs = gate_segment(*val); info->flags = val->dpl; /* interrupt gates clear IF */ - if (val->type == 0xe) - info->flags |= 4; + if (val->type == GATE_INTERRUPT) + info->flags |= 1 << 2; return 1; } @@ -872,7 +915,6 @@ static const struct machine_ops __initdata xen_machine_ops = { .emergency_restart = xen_emergency_restart, }; - /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -897,6 +939,8 @@ asmlinkage void __init xen_start_kernel(void) xen_init_irq_ops(); + xen_init_cpuid_mask(); + #ifdef CONFIG_X86_LOCAL_APIC /* * set up the basic apic ops. @@ -938,6 +982,11 @@ asmlinkage void __init xen_start_kernel(void) if (!xen_initial_domain()) __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); +#ifdef CONFIG_X86_64 + /* Work out if we support NX */ + check_efer(); +#endif + /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index db3802fb7b8..9842b121240 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn) } /* Build the parallel p2m_top_mfn structures */ -void xen_setup_mfn_list_list(void) +static void __init xen_build_mfn_list_list(void) { unsigned pfn, idx; @@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void) unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); } +} +void xen_setup_mfn_list_list(void) +{ BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = @@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void) p2m_top[topidx] = &mfn_list[pfn]; } + + xen_build_mfn_list_list(); } unsigned long get_phys_to_machine(unsigned long pfn) @@ -233,47 +238,74 @@ unsigned long get_phys_to_machine(unsigned long pfn) } EXPORT_SYMBOL_GPL(get_phys_to_machine); -static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) +/* install a new p2m_top page */ +bool install_p2mtop_page(unsigned long pfn, unsigned long *p) { - unsigned long *p; + unsigned topidx = p2m_top_index(pfn); + unsigned long **pfnp, *mfnp; unsigned i; - p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); - BUG_ON(p == NULL); + pfnp = &p2m_top[topidx]; + mfnp = &p2m_top_mfn[topidx]; for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) p[i] = INVALID_P2M_ENTRY; - if (cmpxchg(pp, p2m_missing, p) != p2m_missing) - free_page((unsigned long)p); - else + if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) { *mfnp = virt_to_mfn(p); + return true; + } + + return false; } -void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +static void alloc_p2m(unsigned long pfn) { - unsigned topidx, idx; + unsigned long *p; - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return; - } + p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + BUG_ON(p == NULL); + + if (!install_p2mtop_page(pfn, p)) + free_page((unsigned long)p); +} + +/* Try to install p2m mapping; fail if intermediate bits missing */ +bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + unsigned topidx, idx; if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { BUG_ON(mfn != INVALID_P2M_ENTRY); - return; + return true; } topidx = p2m_top_index(pfn); if (p2m_top[topidx] == p2m_missing) { - /* no need to allocate a page to store an invalid entry */ if (mfn == INVALID_P2M_ENTRY) - return; - alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); + return true; + return false; } idx = p2m_index(pfn); p2m_top[topidx][idx] = mfn; + + return true; +} + +void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } + + if (unlikely(!__set_phys_to_machine(pfn, mfn))) { + alloc_p2m(pfn); + + if (!__set_phys_to_machine(pfn, mfn)) + BUG(); + } } unsigned long arbitrary_virt_to_mfn(void *vaddr) @@ -987,7 +1019,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, return 0; } -void __init xen_mark_init_mm_pinned(void) +static void __init xen_mark_init_mm_pinned(void) { xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -1270,8 +1302,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, } *args; struct multicall_space mcs; - BUG_ON(cpumask_empty(cpus)); - BUG_ON(!mm); + if (cpumask_empty(cpus)) + return; /* nothing to do */ mcs = xen_mc_entry(sizeof(*args)); args = mcs.args; @@ -1438,6 +1470,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) } #endif +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +{ + struct mmuext_op op; + op.cmd = cmd; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) @@ -1446,22 +1487,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) BUG_ON(mem_map); /* should only be used early */ #endif make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); +} + +/* Used for pmd and pud */ +static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) +{ +#ifdef CONFIG_FLATMEM + BUG_ON(mem_map); /* should only be used early */ +#endif + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); } /* Early release_pte assumes that all pts are pinned, since there's only init_mm and anything attached to that is pinned. */ -static void xen_release_pte_init(unsigned long pfn) +static __init void xen_release_pte_init(unsigned long pfn) { + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +static __init void xen_release_pmd_init(unsigned long pfn) { - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } /* This needs to make sure the new pte page is pinned iff its being @@ -1750,7 +1798,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, } #endif /* CONFIG_X86_64 */ -static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) +static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) { pte_t pte; @@ -1773,6 +1821,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) #ifdef CONFIG_X86_LOCAL_APIC case FIX_APIC_BASE: /* maps dummy local APIC */ #endif + case FIX_TEXT_POKE0: + case FIX_TEXT_POKE1: + /* All local page mappings */ pte = pfn_pte(phys, prot); break; @@ -1819,7 +1870,6 @@ __init void xen_post_allocator_init(void) xen_mark_init_mm_pinned(); } - const struct pv_mmu_ops xen_mmu_ops __initdata = { .pagetable_setup_start = xen_pagetable_setup_start, .pagetable_setup_done = xen_pagetable_setup_done, @@ -1843,9 +1893,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .alloc_pte = xen_alloc_pte_init, .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pte_init, + .alloc_pmd = xen_alloc_pmd_init, .alloc_pmd_clone = paravirt_nop, - .release_pmd = xen_release_pte_init, + .release_pmd = xen_release_pmd_init, #ifdef CONFIG_HIGHPTE .kmap_atomic_pte = xen_kmap_atomic_pte, @@ -1883,8 +1933,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, - .alloc_pud = xen_alloc_pte_init, - .release_pud = xen_release_pte_init, + .alloc_pud = xen_alloc_pmd_init, + .release_pud = xen_release_pmd_init, #endif /* PAGETABLE_LEVELS == 4 */ .activate_mm = xen_activate_mm, diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 24d1b44a337..da730262489 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -11,6 +11,9 @@ enum pt_level { }; +bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); +bool install_p2mtop_page(unsigned long pfn, unsigned long *p); + void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 585a6e33083..429834ec168 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) BUG_ON(rc); while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { - HYPERVISOR_sched_op(SCHEDOP_yield, 0); + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); barrier(); } @@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) /* Make sure other vcpus get a chance to run if they need to. */ for_each_cpu(cpu, mask) { if (xen_vcpu_stolen(cpu)) { - HYPERVISOR_sched_op(SCHEDOP_yield, 0); + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); break; } } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 2f5ef2632ea..20139464943 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -57,8 +57,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); -void xen_mark_init_mm_pinned(void); - void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP |