summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig92
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/Makefile4
-rw-r--r--arch/x86/boot/boot.h18
-rw-r--r--arch/x86/boot/cmdline.c12
-rw-r--r--arch/x86/boot/compressed/cmdline.c12
-rw-r--r--arch/x86/boot/compressed/eboot.c21
-rw-r--r--arch/x86/boot/compressed/head_32.S8
-rw-r--r--arch/x86/boot/compressed/head_64.S56
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/boot/header.S47
-rw-r--r--arch/x86/boot/setup.ld2
-rw-r--r--arch/x86/boot/tools/build.c81
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c37
-rw-r--r--arch/x86/ia32/ia32_signal.c50
-rw-r--r--arch/x86/ia32/ia32entry.S16
-rw-r--r--arch/x86/ia32/sys_ia32.c171
-rw-r--r--arch/x86/include/asm/acpi.h4
-rw-r--r--arch/x86/include/asm/amd_nb.h17
-rw-r--r--arch/x86/include/asm/bootparam_utils.h38
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/fpu-internal.h5
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/hpet.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h13
-rw-r--r--arch/x86/include/asm/hypervisor.h13
-rw-r--r--arch/x86/include/asm/ia32.h15
-rw-r--r--arch/x86/include/asm/init.h28
-rw-r--r--arch/x86/include/asm/io_apic.h28
-rw-r--r--arch/x86/include/asm/irq_remapping.h40
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/kexec.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h26
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/include/asm/linkage.h18
-rw-r--r--arch/x86/include/asm/mce.h84
-rw-r--r--arch/x86/include/asm/microcode.h14
-rw-r--r--arch/x86/include/asm/microcode_intel.h85
-rw-r--r--arch/x86/include/asm/mmzone_32.h6
-rw-r--r--arch/x86/include/asm/mshyperv.h4
-rw-r--r--arch/x86/include/asm/mwait.h3
-rw-r--r--arch/x86/include/asm/numa.h6
-rw-r--r--arch/x86/include/asm/numa_64.h6
-rw-r--r--arch/x86/include/asm/page.h7
-rw-r--r--arch/x86/include/asm/page_32.h1
-rw-r--r--arch/x86/include/asm/page_64.h36
-rw-r--r--arch/x86/include/asm/page_64_types.h22
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/parport.h4
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/pci_x86.h7
-rw-r--r--arch/x86/include/asm/perf_event.h13
-rw-r--r--arch/x86/include/asm/pgtable.h34
-rw-r--r--arch/x86/include/asm/pgtable_32.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h8
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/pgtable_types.h5
-rw-r--r--arch/x86/include/asm/processor.h29
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/realmode.h3
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/signal.h22
-rw-r--r--arch/x86/include/asm/sys_ia32.h16
-rw-r--r--arch/x86/include/asm/syscalls.h13
-rw-r--r--arch/x86/include/asm/tlbflush.h18
-rw-r--r--arch/x86/include/asm/uaccess.h55
-rw-r--r--arch/x86/include/asm/unistd.h2
-rw-r--r--arch/x86/include/asm/uv/uv.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h44
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h1496
-rw-r--r--arch/x86/include/asm/vmx.h18
-rw-r--r--arch/x86/include/asm/x86_init.h39
-rw-r--r--arch/x86/include/asm/xen/events.h3
-rw-r--r--arch/x86/include/asm/xen/page.h2
-rw-r--r--arch/x86/include/asm/xor.h491
-rw-r--r--arch/x86/include/asm/xor_32.h309
-rw-r--r--arch/x86/include/asm/xor_64.h305
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h63
-rw-r--r--arch/x86/include/uapi/asm/mce.h87
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h6
-rw-r--r--arch/x86/include/uapi/asm/signal.h8
-rw-r--r--arch/x86/include/uapi/asm/vmx.h9
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/boot.c5
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/amd_gart_64.c5
-rw-r--r--arch/x86/kernel/apb_timer.c10
-rw-r--r--arch/x86/kernel/apic/apic.c28
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c457
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c21
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c206
-rw-r--r--arch/x86/kernel/apm_32.c68
-rw-r--r--arch/x86/kernel/cpu/amd.c68
-rw-r--r--arch/x86/kernel/cpu/bugs.c27
-rw-r--r--arch/x86/kernel/cpu/common.c17
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c16
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c54
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event.h25
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c322
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c2
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c13
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/entry_32.S55
-rw-r--r--arch/x86/kernel/entry_64.S48
-rw-r--r--arch/x86/kernel/ftrace.c4
-rw-r--r--arch/x86/kernel/head32.c21
-rw-r--r--arch/x86/kernel/head64.c146
-rw-r--r--arch/x86/kernel/head_32.S113
-rw-r--r--arch/x86/kernel/head_64.S212
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c1
-rw-r--r--arch/x86/kernel/ioport.c3
-rw-r--r--arch/x86/kernel/kprobes/Makefile7
-rw-r--r--arch/x86/kernel/kprobes/common.h (renamed from arch/x86/kernel/kprobes-common.h)11
-rw-r--r--arch/x86/kernel/kprobes/core.c (renamed from arch/x86/kernel/kprobes.c)76
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c93
-rw-r--r--arch/x86/kernel/kprobes/opt.c (renamed from arch/x86/kernel/kprobes-opt.c)2
-rw-r--r--arch/x86/kernel/kvm.c24
-rw-r--r--arch/x86/kernel/kvmclock.c15
-rw-r--r--arch/x86/kernel/machine_kexec_64.c171
-rw-r--r--arch/x86/kernel/microcode_core.c7
-rw-r--r--arch/x86/kernel/microcode_core_early.c76
-rw-r--r--arch/x86/kernel/microcode_intel.c198
-rw-r--r--arch/x86/kernel/microcode_intel_early.c796
-rw-r--r--arch/x86/kernel/microcode_intel_lib.c174
-rw-r--r--arch/x86/kernel/msr.c3
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/process.c122
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/quirks.c4
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/rtc.c1
-rw-r--r--arch/x86/kernel/setup.c377
-rw-r--r--arch/x86/kernel/signal.c184
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/step.c9
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kernel/traps.c9
-rw-r--r--arch/x86/kernel/tsc.c3
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kernel/vm86_32.c8
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/kernel/x86_init.c28
-rw-r--r--arch/x86/kvm/emulate.c673
-rw-r--r--arch/x86/kvm/i8254.c1
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/irq.c74
-rw-r--r--arch/x86/kvm/lapic.c140
-rw-r--r--arch/x86/kvm/lapic.h34
-rw-r--r--arch/x86/kvm/mmu.c168
-rw-r--r--arch/x86/kvm/mmutrace.h6
-rw-r--r--arch/x86/kvm/paging_tmpl.h106
-rw-r--r--arch/x86/kvm/svm.c24
-rw-r--r--arch/x86/kvm/vmx.c714
-rw-r--r--arch/x86/kvm/x86.c208
-rw-r--r--arch/x86/lguest/Kconfig1
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/getuser.S43
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init.c469
-rw-r--r--arch/x86/mm/init_32.c118
-rw-r--r--arch/x86/mm/init_64.c659
-rw-r--r--arch/x86/mm/memtest.c10
-rw-r--r--arch/x86/mm/mm_internal.h19
-rw-r--r--arch/x86/mm/numa.c43
-rw-r--r--arch/x86/mm/numa_32.c161
-rw-r--r--arch/x86/mm/numa_64.c13
-rw-r--r--arch/x86/mm/numa_internal.h6
-rw-r--r--arch/x86/mm/pageattr.c113
-rw-r--r--arch/x86/mm/pat.c4
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/mm/physaddr.c60
-rw-r--r--arch/x86/mm/srat.c152
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c40
-rw-r--r--arch/x86/pci/acpi.c11
-rw-r--r--arch/x86/pci/bus_numa.c4
-rw-r--r--arch/x86/pci/common.c29
-rw-r--r--arch/x86/pci/fixup.c30
-rw-r--r--arch/x86/pci/legacy.c4
-rw-r--r--arch/x86/pci/mmconfig-shared.c24
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/pci/mmconfig_64.c4
-rw-r--r--arch/x86/pci/mrst.c6
-rw-r--r--arch/x86/pci/numaq_32.c2
-rw-r--r--arch/x86/pci/pcbios.c4
-rw-r--r--arch/x86/platform/Makefile2
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c7
-rw-r--r--arch/x86/platform/efi/efi.c70
-rw-r--r--arch/x86/platform/efi/efi_64.c22
-rw-r--r--arch/x86/platform/goldfish/Makefile1
-rw-r--r--arch/x86/platform/goldfish/goldfish.c51
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c8
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c18
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c2
-rw-r--r--arch/x86/platform/scx200/scx200_32.c6
-rw-r--r--arch/x86/platform/sfi/sfi.c2
-rw-r--r--arch/x86/platform/ts5500/Makefile1
-rw-r--r--arch/x86/platform/ts5500/ts5500.c339
-rw-r--r--arch/x86/platform/uv/tlb_uv.c14
-rw-r--r--arch/x86/platform/uv/uv_time.c13
-rw-r--r--arch/x86/power/hibernate_32.c2
-rw-r--r--arch/x86/power/hibernate_64.c66
-rw-r--r--arch/x86/realmode/init.c49
-rw-r--r--arch/x86/syscalls/syscall_32.tbl22
-rw-r--r--arch/x86/syscalls/syscall_64.tbl6
-rw-r--r--arch/x86/tools/insn_sanity.c10
-rw-r--r--arch/x86/tools/relocs.c6
-rw-r--r--arch/x86/um/Kconfig6
-rw-r--r--arch/x86/um/Makefile4
-rw-r--r--arch/x86/um/fault.c2
-rw-r--r--arch/x86/um/shared/sysdep/syscalls_32.h5
-rw-r--r--arch/x86/um/signal.c15
-rw-r--r--arch/x86/um/sys_call_table_32.c4
-rw-r--r--arch/x86/um/syscalls_32.c38
-rw-r--r--arch/x86/vdso/vclock_gettime.c2
-rw-r--r--arch/x86/xen/enlighten.c78
-rw-r--r--arch/x86/xen/mmu.c72
-rw-r--r--arch/x86/xen/setup.c5
-rw-r--r--arch/x86/xen/smp.c49
-rw-r--r--arch/x86/xen/spinlock.c1
-rw-r--r--arch/x86/xen/suspend.c2
-rw-r--r--arch/x86/xen/xen-asm_32.S14
-rw-r--r--arch/x86/xen/xen-ops.h2
244 files changed, 8904 insertions, 5096 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 79795af5981..6a938337031 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1,7 +1,7 @@
# Select 32 or 64 bit
config 64BIT
bool "64-bit kernel" if ARCH = "x86"
- default ARCH = "x86_64"
+ default ARCH != "i386"
---help---
Say yes to build a 64-bit kernel - formerly known as x86_64
Say no to build a 32-bit kernel - formerly known as i386
@@ -28,7 +28,6 @@ config X86
select HAVE_OPROFILE
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
- select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select HAVE_MEMBLOCK
@@ -40,10 +39,12 @@ config X86
select HAVE_DMA_CONTIGUOUS if !SWIOTLB
select HAVE_KRETPROBES
select HAVE_OPTPROBES
+ select HAVE_KPROBES_ON_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FENTRY if X86_64
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FP_TEST
@@ -106,6 +107,7 @@ config X86
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
select GENERIC_TIME_VSYSCALL if X86_64
select KTIME_SCALAR if X86_32
+ select ALWAYS_USE_PERSISTENT_CLOCK
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select HAVE_CONTEXT_TRACKING if X86_64
@@ -113,7 +115,10 @@ config X86
select MODULES_USE_ELF_REL if X86_32
select MODULES_USE_ELF_RELA if X86_64
select CLONE_BACKWARDS if X86_32
- select GENERIC_SIGALTSTACK
+ select ARCH_USE_BUILTIN_BSWAP
+ select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
+ select OLD_SIGACTION if X86_32
+ select COMPAT_OLD_SIGACTION if IA32_EMULATION
config INSTRUCTION_DECODER
def_bool y
@@ -222,7 +227,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
config HAVE_INTEL_TXT
def_bool y
- depends on EXPERIMENTAL && INTEL_IOMMU && ACPI
+ depends on INTEL_IOMMU && ACPI
config X86_32_SMP
def_bool y
@@ -320,6 +325,10 @@ config X86_BIGSMP
---help---
This option is needed for the systems that have more than 8 CPUs
+config GOLDFISH
+ def_bool y
+ depends on X86_GOLDFISH
+
if X86_32
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
@@ -402,6 +411,14 @@ config X86_UV
# Following is an alphabetically sorted list of 32 bit extended platforms
# Please maintain the alphabetic order if and when there are additions
+config X86_GOLDFISH
+ bool "Goldfish (Virtual Platform)"
+ depends on X86_32
+ ---help---
+ Enable support for the Goldfish virtual platform used primarily
+ for Android development. Unless you are building for the Android
+ Goldfish emulator say N here.
+
config X86_INTEL_CE
bool "CE4100 TV platform"
depends on PCI
@@ -454,6 +471,16 @@ config X86_MDFLD
endif
+config X86_INTEL_LPSS
+ bool "Intel Low Power Subsystem Support"
+ depends on ACPI
+ select COMMON_CLK
+ ---help---
+ Select to build support for Intel Low Power Subsystem such as
+ found on Intel Lynxpoint PCH. Selecting this option enables
+ things like clock tree (common clock framework) which are needed
+ by the LPSS peripheral drivers.
+
config X86_RDC321X
bool "RDC R-321x SoC"
depends on X86_32
@@ -617,7 +644,7 @@ config PARAVIRT
config PARAVIRT_SPINLOCKS
bool "Paravirtualization layer for spinlocks"
- depends on PARAVIRT && SMP && EXPERIMENTAL
+ depends on PARAVIRT && SMP
---help---
Paravirtualized spinlocks allow a pvops backend to replace the
spinlock implementation with something virtualization-friendly
@@ -729,7 +756,7 @@ config GART_IOMMU
config CALGARY_IOMMU
bool "IBM Calgary IOMMU support"
select SWIOTLB
- depends on X86_64 && PCI && EXPERIMENTAL
+ depends on X86_64 && PCI
---help---
Support for hardware IOMMUs in IBM's xSeries x366 and x460
systems. Needed to run systems with more than 3GB of memory
@@ -771,7 +798,7 @@ config IOMMU_HELPER
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
- depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+ depends on X86_64 && SMP && DEBUG_KERNEL
select CPUMASK_OFFSTACK
---help---
Enable maximum number of CPUS and NUMA Nodes for this architecture.
@@ -1029,6 +1056,24 @@ config MICROCODE_OLD_INTERFACE
def_bool y
depends on MICROCODE
+config MICROCODE_INTEL_LIB
+ def_bool y
+ depends on MICROCODE_INTEL
+
+config MICROCODE_INTEL_EARLY
+ bool "Early load microcode"
+ depends on MICROCODE_INTEL && BLK_DEV_INITRD
+ default y
+ help
+ This option provides functionality to read additional microcode data
+ at the beginning of initrd image. The data tells kernel to load
+ microcode to CPU's as early as possible. No functional change if no
+ microcode data is glued to the initrd, therefore it's safe to say Y.
+
+config MICROCODE_EARLY
+ def_bool y
+ depends on MICROCODE_INTEL_EARLY
+
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
---help---
@@ -1107,7 +1152,6 @@ config HIGHMEM64G
endchoice
choice
- depends on EXPERIMENTAL
prompt "Memory split" if EXPERT
default VMSPLIT_3G
depends on X86_32
@@ -1184,7 +1228,7 @@ config DIRECT_GBPAGES
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
depends on SMP
- depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
+ depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI))
default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
---help---
Enable NUMA (Non Uniform Memory Access) support.
@@ -1253,10 +1297,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
-config HAVE_ARCH_ALLOC_REMAP
- def_bool y
- depends on X86_32 && NUMA
-
config ARCH_HAVE_MEMORY_PRESENT
def_bool y
depends on X86_32 && DISCONTIGMEM
@@ -1279,7 +1319,7 @@ config ARCH_DISCONTIGMEM_DEFAULT
config ARCH_SPARSEMEM_ENABLE
def_bool y
- depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
+ depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
@@ -1593,8 +1633,7 @@ config CRASH_DUMP
For more details see Documentation/kdump/kdump.txt
config KEXEC_JUMP
- bool "kexec jump (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ bool "kexec jump"
depends on KEXEC && HIBERNATION
---help---
Jump between original kernel and kexeced kernel and invoke
@@ -1699,7 +1738,7 @@ config HOTPLUG_CPU
config BOOTPARAM_HOTPLUG_CPU0
bool "Set default setting of cpu0_hotpluggable"
default n
- depends on HOTPLUG_CPU && EXPERIMENTAL
+ depends on HOTPLUG_CPU
---help---
Set whether default state of cpu0_hotpluggable is on or off.
@@ -1728,7 +1767,7 @@ config BOOTPARAM_HOTPLUG_CPU0
config DEBUG_HOTPLUG_CPU0
def_bool n
prompt "Debug CPU0 hotplug"
- depends on HOTPLUG_CPU && EXPERIMENTAL
+ depends on HOTPLUG_CPU
---help---
Enabling this option offlines CPU0 (if CPU0 can be offlined) as
soon as possible and boots up userspace with CPU0 offlined. User
@@ -1912,6 +1951,7 @@ config APM_DO_ENABLE
this feature.
config APM_CPU_IDLE
+ depends on CPU_IDLE
bool "Make CPU Idle calls when idle"
---help---
Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
@@ -2037,7 +2077,7 @@ config PCI_MMCONFIG
config PCI_CNB20LE_QUIRK
bool "Read CNB20LE Host Bridge Windows" if EXPERT
- depends on PCI && EXPERIMENTAL
+ depends on PCI
help
Read the PCI windows out of the CNB20LE host bridge. This allows
PCI hotplug to work on systems with the CNB20LE chipset which do
@@ -2138,6 +2178,7 @@ config OLPC_XO1_RTC
config OLPC_XO1_SCI
bool "OLPC XO-1 SCI extras"
depends on OLPC && OLPC_XO1_PM
+ depends on INPUT=y
select POWER_SUPPLY
select GPIO_CS5535
select MFD_CORE
@@ -2187,6 +2228,15 @@ config GEOS
---help---
This option enables system support for the Traverse Technologies GEOS.
+config TS5500
+ bool "Technologic Systems TS-5500 platform support"
+ depends on MELAN
+ select CHECK_SIGNATURE
+ select NEW_LEDS
+ select LEDS_CLASS
+ ---help---
+ This option enables system support for the Technologic Systems TS-5500.
+
endif # X86_32
config AMD_NB
@@ -2231,8 +2281,8 @@ config IA32_AOUT
Support old a.out binaries in the 32bit emulation.
config X86_X32
- bool "x32 ABI for 64-bit mode (EXPERIMENTAL)"
- depends on X86_64 && IA32_EMULATION && EXPERIMENTAL
+ bool "x32 ABI for 64-bit mode"
+ depends on X86_64 && IA32_EMULATION
---help---
Include code to run binaries for the x32 native 32-bit ABI
for 64-bit processors. An x32 process gets access to the
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index e71fc4279aa..5c477260294 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -2,7 +2,11 @@
# select defconfig based on actual architecture
ifeq ($(ARCH),x86)
+ ifeq ($(shell uname -m),x86_64)
+ KBUILD_DEFCONFIG := x86_64_defconfig
+ else
KBUILD_DEFCONFIG := i386_defconfig
+ endif
else
KBUILD_DEFCONFIG := $(ARCH)_defconfig
endif
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index ccce0ed67dd..379814bc41e 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -71,7 +71,7 @@ GCOV_PROFILE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
quiet_cmd_image = BUILD $@
-cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin > $@
+cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/zoffset.h > $@
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
@@ -92,7 +92,7 @@ targets += voffset.h
$(obj)/voffset.h: vmlinux FORCE
$(call if_changed,voffset)
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi_pe_entry\|efi_stub_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 18997e5a105..5b7531966b8 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -285,16 +285,26 @@ struct biosregs {
void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
/* cmdline.c */
-int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize);
-int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option);
+int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize);
+int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option);
static inline int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
- return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize);
+ unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
+
+ if (cmd_line_ptr >= 0x100000)
+ return -1; /* inaccessible */
+
+ return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize);
}
static inline int cmdline_find_option_bool(const char *option)
{
- return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option);
+ unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
+
+ if (cmd_line_ptr >= 0x100000)
+ return -1; /* inaccessible */
+
+ return __cmdline_find_option_bool(cmd_line_ptr, option);
}
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c
index 6b3b6f708c0..625d21b0cd3 100644
--- a/arch/x86/boot/cmdline.c
+++ b/arch/x86/boot/cmdline.c
@@ -27,7 +27,7 @@ static inline int myisspace(u8 c)
* Returns the length of the argument (regardless of if it was
* truncated to fit in the buffer), or -1 on not found.
*/
-int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize)
+int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize)
{
addr_t cptr;
char c;
@@ -41,8 +41,8 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
st_bufcpy /* Copying this to buffer */
} state = st_wordstart;
- if (!cmdline_ptr || cmdline_ptr >= 0x100000)
- return -1; /* No command line, or inaccessible */
+ if (!cmdline_ptr)
+ return -1; /* No command line */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
@@ -99,7 +99,7 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int
* Returns the position of that option (starts counting with 1)
* or 0 on not found
*/
-int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
+int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option)
{
addr_t cptr;
char c;
@@ -111,8 +111,8 @@ int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option)
st_wordskip, /* Miscompare, skip */
} state = st_wordstart;
- if (!cmdline_ptr || cmdline_ptr >= 0x100000)
- return -1; /* No command line, or inaccessible */
+ if (!cmdline_ptr)
+ return -1; /* No command line */
cptr = cmdline_ptr & 0xf;
set_fs(cmdline_ptr >> 4);
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 10f6b1178c6..bffd73b45b1 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -13,13 +13,21 @@ static inline char rdfs8(addr_t addr)
return *((char *)(fs + addr));
}
#include "../cmdline.c"
+static unsigned long get_cmd_line_ptr(void)
+{
+ unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
+
+ cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
+
+ return cmd_line_ptr;
+}
int cmdline_find_option(const char *option, char *buffer, int bufsize)
{
- return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize);
+ return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize);
}
int cmdline_find_option_bool(const char *option)
{
- return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option);
+ return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
}
#endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index b1942e22276..f8fa41190c3 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -256,10 +256,10 @@ static efi_status_t setup_efi_pci(struct boot_params *params)
int i;
struct setup_data *data;
- data = (struct setup_data *)params->hdr.setup_data;
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
while (data && data->next)
- data = (struct setup_data *)data->next;
+ data = (struct setup_data *)(unsigned long)data->next;
status = efi_call_phys5(sys_table->boottime->locate_handle,
EFI_LOCATE_BY_PROTOCOL, &pci_proto,
@@ -295,16 +295,18 @@ static efi_status_t setup_efi_pci(struct boot_params *params)
if (!pci)
continue;
+#ifdef CONFIG_X86_64
status = efi_call_phys4(pci->attributes, pci,
EfiPciIoAttributeOperationGet, 0,
&attributes);
-
+#else
+ status = efi_call_phys5(pci->attributes, pci,
+ EfiPciIoAttributeOperationGet, 0, 0,
+ &attributes);
+#endif
if (status != EFI_SUCCESS)
continue;
- if (!attributes & EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM)
- continue;
-
if (!pci->romimage || !pci->romsize)
continue;
@@ -345,9 +347,9 @@ static efi_status_t setup_efi_pci(struct boot_params *params)
memcpy(rom->romdata, pci->romimage, pci->romsize);
if (data)
- data->next = (uint64_t)rom;
+ data->next = (unsigned long)rom;
else
- params->hdr.setup_data = (uint64_t)rom;
+ params->hdr.setup_data = (unsigned long)rom;
data = (struct setup_data *)rom;
@@ -432,10 +434,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
* Once we've found a GOP supporting ConOut,
* don't bother looking any further.
*/
+ first_gop = gop;
if (conout_found)
break;
-
- first_gop = gop;
}
}
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index aa4aaf1b238..1e3184f6072 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -35,11 +35,11 @@ ENTRY(startup_32)
#ifdef CONFIG_EFI_STUB
jmp preferred_addr
- .balign 0x10
/*
* We don't need the return address, so set up the stack so
- * efi_main() can find its arugments.
+ * efi_main() can find its arguments.
*/
+ENTRY(efi_pe_entry)
add $0x4, %esp
call make_boot_params
@@ -50,8 +50,10 @@ ENTRY(startup_32)
pushl %eax
pushl %esi
pushl %ecx
+ sub $0x4, %esp
- .org 0x30,0x90
+ENTRY(efi_stub_entry)
+ add $0x4, %esp
call efi_main
cmpl $0, %eax
movl %eax, %esi
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 2c4b171eec3..c1d383d1fb7 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -37,6 +37,12 @@
__HEAD
.code32
ENTRY(startup_32)
+ /*
+ * 32bit entry is 0 and it is ABI so immutable!
+ * If we come here directly from a bootloader,
+ * kernel(text+data+bss+brk) ramdisk, zero_page, command line
+ * all need to be under the 4G limit.
+ */
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -154,6 +160,12 @@ ENTRY(startup_32)
btsl $_EFER_LME, %eax
wrmsr
+ /* After gdt is loaded */
+ xorl %eax, %eax
+ lldt %ax
+ movl $0x20, %eax
+ ltr %ax
+
/*
* Setup for the jump to 64bit mode
*
@@ -176,37 +188,27 @@ ENTRY(startup_32)
lret
ENDPROC(startup_32)
-no_longmode:
- /* This isn't an x86-64 CPU so hang */
-1:
- hlt
- jmp 1b
-
-#include "../../kernel/verify_cpu.S"
-
- /*
- * Be careful here startup_64 needs to be at a predictable
- * address so I can export it in an ELF header. Bootloaders
- * should look at the ELF header to find this address, as
- * it may change in the future.
- */
.code64
.org 0x200
ENTRY(startup_64)
/*
+ * 64bit entry is 0x200 and it is ABI so immutable!
* We come here either from startup_32 or directly from a
- * 64bit bootloader. If we come here from a bootloader we depend on
- * an identity mapped page table being provied that maps our
- * entire text+data+bss and hopefully all of memory.
+ * 64bit bootloader.
+ * If we come here from a bootloader, kernel(text+data+bss+brk),
+ * ramdisk, zero_page, command line could be above 4G.
+ * We depend on an identity mapped page table being provided
+ * that maps our entire kernel(text+data+bss+brk), zero page
+ * and command line.
*/
#ifdef CONFIG_EFI_STUB
/*
- * The entry point for the PE/COFF executable is 0x210, so only
- * legacy boot loaders will execute this jmp.
+ * The entry point for the PE/COFF executable is efi_pe_entry, so
+ * only legacy boot loaders will execute this jmp.
*/
jmp preferred_addr
- .org 0x210
+ENTRY(efi_pe_entry)
mov %rcx, %rdi
mov %rdx, %rsi
pushq %rdi
@@ -218,7 +220,7 @@ ENTRY(startup_64)
popq %rsi
popq %rdi
- .org 0x230,0x90
+ENTRY(efi_stub_entry)
call efi_main
movq %rax,%rsi
cmpq $0,%rax
@@ -247,9 +249,6 @@ preferred_addr:
movl %eax, %ss
movl %eax, %fs
movl %eax, %gs
- lldt %ax
- movl $0x20, %eax
- ltr %ax
/*
* Compute the decompressed kernel start address. It is where
@@ -349,6 +348,15 @@ relocated:
*/
jmp *%rbp
+ .code32
+no_longmode:
+ /* This isn't an x86-64 CPU so hang */
+1:
+ hlt
+ jmp 1b
+
+#include "../../kernel/verify_cpu.S"
+
.data
gdt:
.word gdt_end - gdt
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 88f7ff6da40..7cb56c6ca35 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -325,6 +325,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
{
real_mode = rmode;
+ sanitize_boot_params(real_mode);
+
if (real_mode->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 0e6dc0ee0ee..674019d8e23 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -18,6 +18,7 @@
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
+#include <asm/bootparam_utils.h>
#define BOOT_BOOT_H
#include "../ctype.h"
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 8c132a625b9..9ec06a1f6d6 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -21,6 +21,7 @@
#include <asm/e820.h>
#include <asm/page_types.h>
#include <asm/setup.h>
+#include <asm/bootparam.h>
#include "boot.h"
#include "voffset.h"
#include "zoffset.h"
@@ -255,6 +256,9 @@ section_table:
# header, from the old boot sector.
.section ".header", "a"
+ .globl sentinel
+sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */
+
.globl hdr
hdr:
setup_sects: .byte 0 /* Filled in by build.c */
@@ -279,7 +283,7 @@ _start:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
- .word 0x020b # header version number (>= 0x0105)
+ .word 0x020c # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -297,13 +301,7 @@ type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# flags, unused bits must be zero (RFU) bit within loadflags
loadflags:
-LOADED_HIGH = 1 # If set, the kernel is loaded high
-CAN_USE_HEAP = 0x80 # If set, the loader also has set
- # heap_end_ptr to tell how much
- # space behind setup.S can be used for
- # heap purposes.
- # Only the loader knows what is free
- .byte LOADED_HIGH
+ .byte LOADED_HIGH # The kernel is to be loaded high
setup_move_size: .word 0x8000 # size to move, when setup is not
# loaded at 0x90000. We will move setup
@@ -369,7 +367,31 @@ relocatable_kernel: .byte 1
relocatable_kernel: .byte 0
#endif
min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
-pad3: .word 0
+
+xloadflags:
+#ifdef CONFIG_X86_64
+# define XLF0 XLF_KERNEL_64 /* 64-bit kernel */
+#else
+# define XLF0 0
+#endif
+
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
+ /* kernel/boot_param/ramdisk could be loaded above 4g */
+# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
+#else
+# define XLF1 0
+#endif
+
+#ifdef CONFIG_EFI_STUB
+# ifdef CONFIG_X86_64
+# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
+# else
+# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
+# endif
+#else
+# define XLF23 0
+#endif
+ .word XLF0 | XLF1 | XLF23
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
@@ -397,8 +419,13 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
#define INIT_SIZE VO_INIT_SIZE
#endif
init_size: .long INIT_SIZE # kernel initialization size
-handover_offset: .long 0x30 # offset to the handover
+handover_offset:
+#ifdef CONFIG_EFI_STUB
+ .long 0x30 # offset to the handover
# protocol entry point
+#else
+ .long 0
+#endif
# End of setup header #####################################################
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 03c0683636b..96a6c756353 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -13,7 +13,7 @@ SECTIONS
.bstext : { *(.bstext) }
.bsdata : { *(.bsdata) }
- . = 497;
+ . = 495;
.header : { *(.header) }
.entrytext : { *(.entrytext) }
.inittext : { *(.inittext) }
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 4b8e165ee57..94c54465002 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -52,6 +52,10 @@ int is_big_kernel;
#define PECOFF_RELOC_RESERVE 0x20
+unsigned long efi_stub_entry;
+unsigned long efi_pe_entry;
+unsigned long startup_64;
+
/*----------------------------------------------------------------------*/
static const u32 crctab32[] = {
@@ -132,7 +136,7 @@ static void die(const char * str, ...)
static void usage(void)
{
- die("Usage: build setup system [> image]");
+ die("Usage: build setup system [zoffset.h] [> image]");
}
#ifdef CONFIG_EFI_STUB
@@ -206,30 +210,54 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
*/
put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
-#ifdef CONFIG_X86_32
/*
- * Address of entry point.
- *
- * The EFI stub entry point is +16 bytes from the start of
- * the .text section.
+ * Address of entry point for PE/COFF executable
*/
- put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]);
-#else
- /*
- * Address of entry point. startup_32 is at the beginning and
- * the 64-bit entry point (startup_64) is always 512 bytes
- * after. The EFI stub entry point is 16 bytes after that, as
- * the first instruction allows legacy loaders to jump over
- * the EFI stub initialisation
- */
- put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]);
-#endif /* CONFIG_X86_32 */
+ put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]);
update_pecoff_section_header(".text", text_start, text_sz);
}
#endif /* CONFIG_EFI_STUB */
+
+/*
+ * Parse zoffset.h and find the entry points. We could just #include zoffset.h
+ * but that would mean tools/build would have to be rebuilt every time. It's
+ * not as if parsing it is hard...
+ */
+#define PARSE_ZOFS(p, sym) do { \
+ if (!strncmp(p, "#define ZO_" #sym " ", 11+sizeof(#sym))) \
+ sym = strtoul(p + 11 + sizeof(#sym), NULL, 16); \
+} while (0)
+
+static void parse_zoffset(char *fname)
+{
+ FILE *file;
+ char *p;
+ int c;
+
+ file = fopen(fname, "r");
+ if (!file)
+ die("Unable to open `%s': %m", fname);
+ c = fread(buf, 1, sizeof(buf) - 1, file);
+ if (ferror(file))
+ die("read-error on `zoffset.h'");
+ buf[c] = 0;
+
+ p = (char *)buf;
+
+ while (p && *p) {
+ PARSE_ZOFS(p, efi_stub_entry);
+ PARSE_ZOFS(p, efi_pe_entry);
+ PARSE_ZOFS(p, startup_64);
+
+ p = strchr(p, '\n');
+ while (p && (*p == '\r' || *p == '\n'))
+ p++;
+ }
+}
+
int main(int argc, char ** argv)
{
unsigned int i, sz, setup_sectors;
@@ -241,7 +269,19 @@ int main(int argc, char ** argv)
void *kernel;
u32 crc = 0xffffffffUL;
- if (argc != 3)
+ /* Defaults for old kernel */
+#ifdef CONFIG_X86_32
+ efi_pe_entry = 0x10;
+ efi_stub_entry = 0x30;
+#else
+ efi_pe_entry = 0x210;
+ efi_stub_entry = 0x230;
+ startup_64 = 0x200;
+#endif
+
+ if (argc == 4)
+ parse_zoffset(argv[3]);
+ else if (argc != 3)
usage();
/* Copy the setup code */
@@ -299,6 +339,11 @@ int main(int argc, char ** argv)
#ifdef CONFIG_EFI_STUB
update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz));
+
+#ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */
+ efi_stub_entry -= 0x200;
+#endif
+ put_unaligned_le32(efi_stub_entry, &buf[0x264]);
#endif
crc = partial_crc32(buf, i, crc);
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 5598547281a..94447086e55 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,3 +1,4 @@
+# CONFIG_64BIT is not set
CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 1b9c22bea8a..a0795da22c0 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -40,10 +40,6 @@
#include <linux/workqueue.h>
#include <linux/spinlock.h>
-#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
-#define HAS_CTR
-#endif
-
#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
#define HAS_PCBC
#endif
@@ -395,12 +391,6 @@ static int ablk_ctr_init(struct crypto_tfm *tfm)
return ablk_init_common(tfm, "__driver-ctr-aes-aesni");
}
-#ifdef HAS_CTR
-static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
-{
- return ablk_init_common(tfm, "rfc3686(__driver-ctr-aes-aesni)");
-}
-#endif
#endif
#ifdef HAS_PCBC
@@ -1158,33 +1148,6 @@ static struct crypto_alg aesni_algs[] = { {
.maxauthsize = 16,
},
},
-#ifdef HAS_CTR
-}, {
- .cra_name = "rfc3686(ctr(aes))",
- .cra_driver_name = "rfc3686-ctr-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_rfc3686_ctr_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE +
- CTR_RFC3686_NONCE_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE +
- CTR_RFC3686_NONCE_SIZE,
- .ivsize = CTR_RFC3686_IV_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- .geniv = "seqiv",
- },
- },
-#endif
#endif
#ifdef HAS_PCBC
}, {
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a1daf4a6500..cf1a471a18a 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -129,13 +129,6 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
return err;
}
-asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
-{
- sigset_t blocked;
- siginitset(&blocked, mask);
- return sigsuspend(&blocked);
-}
-
/*
* Do a signal return; undo the signal stack.
*/
@@ -215,8 +208,9 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
return err;
}
-asmlinkage long sys32_sigreturn(struct pt_regs *regs)
+asmlinkage long sys32_sigreturn(void)
{
+ struct pt_regs *regs = current_pt_regs();
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
unsigned int ax;
@@ -241,8 +235,9 @@ badframe:
return 0;
}
-asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
+asmlinkage long sys32_rt_sigreturn(void)
{
+ struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
unsigned int ax;
@@ -314,7 +309,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
/*
* Determine which stack to use..
*/
-static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
size_t frame_size,
void __user **fpstate)
{
@@ -324,16 +319,13 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
sp = regs->sp;
/* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(sp) == 0)
- sp = current->sas_ss_sp + current->sas_ss_size;
- }
-
+ if (ksig->ka.sa.sa_flags & SA_ONSTACK)
+ sp = sigsp(sp, ksig);
/* This is the legacy signal stack switching. */
else if ((regs->ss & 0xffff) != __USER32_DS &&
- !(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer)
- sp = (unsigned long) ka->sa.sa_restorer;
+ !(ksig->ka.sa.sa_flags & SA_RESTORER) &&
+ ksig->ka.sa.sa_restorer)
+ sp = (unsigned long) ksig->ka.sa.sa_restorer;
if (used_math()) {
unsigned long fx_aligned, math_size;
@@ -352,7 +344,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
return (void __user *) sp;
}
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
+int ia32_setup_frame(int sig, struct ksignal *ksig,
compat_sigset_t *set, struct pt_regs *regs)
{
struct sigframe_ia32 __user *frame;
@@ -371,7 +363,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
0x80cd, /* int $0x80 */
};
- frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+ frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
@@ -388,8 +380,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
return -EFAULT;
}
- if (ka->sa.sa_flags & SA_RESTORER) {
- restorer = ka->sa.sa_restorer;
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = ksig->ka.sa.sa_restorer;
} else {
/* Return stub is in 32bit vsyscall page */
if (current->mm->context.vdso)
@@ -414,7 +406,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
- regs->ip = (unsigned long) ka->sa.sa_handler;
+ regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
/* Make -mregparm=3 work */
regs->ax = sig;
@@ -430,7 +422,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
return 0;
}
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
compat_sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe_ia32 __user *frame;
@@ -451,7 +443,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
0,
};
- frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+ frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
@@ -469,8 +461,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
put_user_ex(0, &frame->uc.uc_link);
err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
+ restorer = ksig->ka.sa.sa_restorer;
else
restorer = VDSO32_SYMBOL(current->mm->context.vdso,
rt_sigreturn);
@@ -483,7 +475,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
} put_user_catch(err);
- err |= copy_siginfo_to_user32(&frame->info, info);
+ err |= copy_siginfo_to_user32(&frame->info, &ksig->info);
err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -493,7 +485,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
- regs->ip = (unsigned long) ka->sa.sa_handler;
+ regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
/* Make -mregparm=3 work */
regs->ax = sig;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 102ff7cb3e4..474dc1b59f7 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -207,7 +207,7 @@ sysexit_from_sys_call:
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz ia32_ret_from_sys_call
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
movl %eax,%esi /* second arg, syscall return value */
cmpl $-MAX_ERRNO,%eax /* is it an error ? */
jbe 1f
@@ -217,7 +217,7 @@ sysexit_from_sys_call:
call __audit_syscall_exit
movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz \exit
@@ -456,18 +456,16 @@ ia32_badsys:
ALIGN
GLOBAL(\label)
leaq \func(%rip),%rax
- leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
jmp ia32_ptregs_common
.endm
CFI_STARTPROC32
- PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
- PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
- PTREGSCALL stub32_execve, compat_sys_execve, %rcx
- PTREGSCALL stub32_fork, sys_fork, %rdi
- PTREGSCALL stub32_vfork, sys_vfork, %rdi
- PTREGSCALL stub32_iopl, sys_iopl, %rsi
+ PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
+ PTREGSCALL stub32_sigreturn, sys32_sigreturn
+ PTREGSCALL stub32_execve, compat_sys_execve
+ PTREGSCALL stub32_fork, sys_fork
+ PTREGSCALL stub32_vfork, sys_vfork
ALIGN
GLOBAL(stub32_clone)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index d0b689ba7be..592f5a9a9c0 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -172,183 +172,12 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len,
return sys_mprotect(start, len, prot);
}
-asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
- struct sigaction32 __user *oact,
- unsigned int sigsetsize)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
- compat_sigset_t set32;
-
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
-
- if (act) {
- compat_uptr_t handler, restorer;
-
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(restorer, &act->sa_restorer) ||
- __copy_from_user(&set32, &act->sa_mask,
- sizeof(compat_sigset_t)))
- return -EFAULT;
- new_ka.sa.sa_handler = compat_ptr(handler);
- new_ka.sa.sa_restorer = compat_ptr(restorer);
-
- /*
- * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
- * than _NSIG_WORDS << 1
- */
- switch (_NSIG_WORDS) {
- case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
- | (((long)set32.sig[7]) << 32);
- case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
- | (((long)set32.sig[5]) << 32);
- case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
- | (((long)set32.sig[3]) << 32);
- case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
- | (((long)set32.sig[1]) << 32);
- }
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- /*
- * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
- * than _NSIG_WORDS << 1
- */
- switch (_NSIG_WORDS) {
- case 4:
- set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
- set32.sig[6] = old_ka.sa.sa_mask.sig[3];
- case 3:
- set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
- set32.sig[4] = old_ka.sa.sa_mask.sig[2];
- case 2:
- set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
- set32.sig[2] = old_ka.sa.sa_mask.sig[1];
- case 1:
- set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
- set32.sig[0] = old_ka.sa.sa_mask.sig[0];
- }
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_handler),
- &oact->sa_handler) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
- &oact->sa_restorer) ||
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __copy_to_user(&oact->sa_mask, &set32,
- sizeof(compat_sigset_t)))
- return -EFAULT;
- }
-
- return ret;
-}
-
-asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
- struct old_sigaction32 __user *oact)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
-
- if (act) {
- compat_old_sigset_t mask;
- compat_uptr_t handler, restorer;
-
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(restorer, &act->sa_restorer) ||
- __get_user(mask, &act->sa_mask))
- return -EFAULT;
-
- new_ka.sa.sa_handler = compat_ptr(handler);
- new_ka.sa.sa_restorer = compat_ptr(restorer);
-
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_handler),
- &oact->sa_handler) ||
- __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
- &oact->sa_restorer) ||
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
- return -EFAULT;
- }
-
- return ret;
-}
-
asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
int options)
{
return compat_sys_wait4(pid, stat_addr, options, NULL);
}
-/* 32-bit timeval and related flotsam. */
-
-asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval)
-{
- struct timespec t;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
- set_fs(old_fs);
- if (put_compat_timespec(&t, interval))
- return -EFAULT;
- return ret;
-}
-
-asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
- compat_size_t sigsetsize)
-{
- sigset_t s;
- compat_sigset_t s32;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
- set_fs(old_fs);
- if (!ret) {
- switch (_NSIG_WORDS) {
- case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
- case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
- case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
- case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
- }
- if (copy_to_user(set, &s32, sizeof(compat_sigset_t)))
- return -EFAULT;
- }
- return ret;
-}
-
-asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
- compat_siginfo_t __user *uinfo)
-{
- siginfo_t info;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- if (copy_siginfo_from_user32(&info, uinfo))
- return -EFAULT;
- set_fs(KERNEL_DS);
- ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
- set_fs(old_fs);
- return ret;
-}
-
/* warning: next two assume little endian */
asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
u32 poslo, u32 poshi)
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 0c44630d178..b31bf97775f 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -49,10 +49,6 @@
/* Asm macros */
-#define ACPI_ASM_MACROS
-#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() local_irq_disable()
-#define ACPI_ENABLE_IRQS() local_irq_enable()
#define ACPI_FLUSH_CPU_CACHE() wbinvd()
int __acpi_acquire_global_lock(unsigned int *lock);
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9cd8f..a54ee1d054d 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,23 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
}
+static inline u16 amd_get_node_id(struct pci_dev *pdev)
+{
+ struct pci_dev *misc;
+ int i;
+
+ for (i = 0; i != amd_nb_num(); i++) {
+ misc = node_to_amd_nb(i)->misc;
+
+ if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
+ PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
+ return i;
+ }
+
+ WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev));
+ return 0;
+}
+
#else
#define amd_nb_num(x) 0
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
new file mode 100644
index 00000000000..5b5e9cb774b
--- /dev/null
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_X86_BOOTPARAM_UTILS_H
+#define _ASM_X86_BOOTPARAM_UTILS_H
+
+#include <asm/bootparam.h>
+
+/*
+ * This file is included from multiple environments. Do not
+ * add completing #includes to make it standalone.
+ */
+
+/*
+ * Deal with bootloaders which fail to initialize unknown fields in
+ * boot_params to zero. The list fields in this list are taken from
+ * analysis of kexec-tools; if other broken bootloaders initialize a
+ * different set of fields we will need to figure out how to disambiguate.
+ *
+ */
+static void sanitize_boot_params(struct boot_params *boot_params)
+{
+ if (boot_params->sentinel) {
+ /*fields in boot_params are not valid, clear them */
+ memset(&boot_params->olpc_ofw_header, 0,
+ (char *)&boot_params->alt_mem_k -
+ (char *)&boot_params->olpc_ofw_header);
+ memset(&boot_params->kbd_status, 0,
+ (char *)&boot_params->hdr -
+ (char *)&boot_params->kbd_status);
+ memset(&boot_params->_pad7[0], 0,
+ (char *)&boot_params->edd_mbr_sig_buffer[0] -
+ (char *)&boot_params->_pad7[0]);
+ memset(&boot_params->_pad8[0], 0,
+ (char *)&boot_params->eddbuf[0] -
+ (char *)&boot_params->_pad8[0]);
+ memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
+ }
+}
+
+#endif /* _ASM_X86_BOOTPARAM_UTILS_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 2d9075e863a..93fe929d1ce 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
+#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 6e8fdf5ad11..28677c55113 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -94,6 +94,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
#endif /* CONFIG_X86_32 */
extern int add_efi_memmap;
+extern unsigned long x86_efi_facility;
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 41ab26ea656..e25cc33ec54 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -26,9 +26,10 @@
#ifdef CONFIG_X86_64
# include <asm/sigcontext32.h>
# include <asm/user32.h>
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+struct ksignal;
+int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
compat_sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
+int ia32_setup_frame(int sig, struct ksignal *ksig,
compat_sigset_t *set, struct pt_regs *regs);
#else
# define user_i387_ia32_struct user_i387_struct
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9a25b522d37..86cb51e1ca9 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
#ifdef CONFIG_DYNAMIC_FTRACE
#define ARCH_SUPPORTS_FTRACE_OPS 1
-#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
#endif
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 434e2106cc8..b18df579c0e 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
#ifdef CONFIG_PCI_MSI
-extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);
+extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
#else
-static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
+static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
{
return -EINVAL;
}
@@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
static inline int hpet_enable(void) { return 0; }
static inline int is_hpet_enabled(void) { return 0; }
#define hpet_readl(a) 0
+#define default_setup_hpet_msi NULL
#endif
#endif /* _ASM_X86_HPET_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eb92a6ed2be..10a78c3d3d5 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
irq_attr->polarity = polarity;
}
+/* Intel specific interrupt remapping information */
struct irq_2_iommu {
struct intel_iommu *iommu;
u16 irte_index;
@@ -108,6 +109,12 @@ struct irq_2_iommu {
u8 irte_mask;
};
+/* AMD specific interrupt remapping information */
+struct irq_2_irte {
+ u16 devid; /* Device ID for IRTE table */
+ u16 index; /* Index into IRTE table*/
+};
+
/*
* This is performance-critical, we want to do it O(1)
*
@@ -120,7 +127,11 @@ struct irq_cfg {
u8 vector;
u8 move_in_progress : 1;
#ifdef CONFIG_IRQ_REMAP
- struct irq_2_iommu irq_2_iommu;
+ u8 remapped : 1;
+ union {
+ struct irq_2_iommu irq_2_iommu;
+ struct irq_2_irte irq_2_irte;
+ };
#endif
};
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index b518c750993..86095ed1413 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -25,6 +25,7 @@
extern void init_hypervisor(struct cpuinfo_x86 *c);
extern void init_hypervisor_platform(void);
+extern bool hypervisor_x2apic_available(void);
/*
* x86 hypervisor information
@@ -41,6 +42,9 @@ struct hypervisor_x86 {
/* Platform setup (run once per boot) */
void (*init_platform)(void);
+
+ /* X2APIC detection (run once per boot) */
+ bool (*x2apic_available)(void);
};
extern const struct hypervisor_x86 *x86_hyper;
@@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
extern const struct hypervisor_x86 x86_hyper_kvm;
-static inline bool hypervisor_x2apic_available(void)
-{
- if (kvm_para_available())
- return true;
- if (xen_x2apic_para_available())
- return true;
- return false;
-}
-
#endif
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 4c6da2e4bb1..d0e8e014104 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -13,21 +13,6 @@
#include <asm/sigcontext32.h>
/* signal.h */
-struct sigaction32 {
- unsigned int sa_handler; /* Really a pointer, but need to deal
- with 32 bits */
- unsigned int sa_flags;
- unsigned int sa_restorer; /* Another 32 bit pointer */
- compat_sigset_t sa_mask; /* A 32 bit mask */
-};
-
-struct old_sigaction32 {
- unsigned int sa_handler; /* Really a pointer, but need to deal
- with 32 bits */
- compat_old_sigset_t sa_mask; /* A 32 bit mask */
- unsigned int sa_flags;
- unsigned int sa_restorer; /* Another 32 bit pointer */
-};
struct ucontext_ia32 {
unsigned int uc_flags;
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index adcc0ae73d0..223042086f4 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -1,20 +1,14 @@
-#ifndef _ASM_X86_INIT_32_H
-#define _ASM_X86_INIT_32_H
+#ifndef _ASM_X86_INIT_H
+#define _ASM_X86_INIT_H
-#ifdef CONFIG_X86_32
-extern void __init early_ioremap_page_table_range_init(void);
-#endif
+struct x86_mapping_info {
+ void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+ void *context; /* context for alloc_pgt_page */
+ unsigned long pmd_flag; /* page flag for PMD entry */
+ bool kernel_mapping; /* kernel mapping or ident mapping */
+};
-extern void __init zone_sizes_init(void);
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+ unsigned long addr, unsigned long end);
-extern unsigned long __init
-kernel_physical_mapping_init(unsigned long start,
- unsigned long end,
- unsigned long page_size_mask);
-
-
-extern unsigned long __initdata pgt_buf_start;
-extern unsigned long __meminitdata pgt_buf_end;
-extern unsigned long __meminitdata pgt_buf_top;
-
-#endif /* _ASM_X86_INIT_32_H */
+#endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 73d8c5398ea..459e50a424d 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -144,11 +144,24 @@ extern int timer_through_8259;
(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
struct io_apic_irq_attr;
+struct irq_cfg;
extern int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr);
void setup_IO_APIC_irq_extra(u32 gsi);
extern void ioapic_insert_resources(void);
+extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
+ unsigned int, int,
+ struct io_apic_irq_attr *);
+extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
+ unsigned int, int,
+ struct io_apic_irq_attr *);
+extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
+
+extern void native_compose_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id);
+extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
extern int save_ioapic_entries(void);
@@ -179,6 +192,12 @@ extern void __init native_io_apic_init_mappings(void);
extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
+extern void native_disable_io_apic(void);
+extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
+extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
+extern int native_ioapic_set_affinity(struct irq_data *,
+ const struct cpumask *,
+ bool);
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
{
@@ -193,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
{
x86_io_apic_ops.modify(apic, reg, value);
}
+
+extern void io_apic_eoi(unsigned int apic, unsigned int vector);
+
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
@@ -223,6 +245,12 @@ static inline void disable_ioapic_support(void) { }
#define native_io_apic_read NULL
#define native_io_apic_write NULL
#define native_io_apic_modify NULL
+#define native_disable_io_apic NULL
+#define native_io_apic_print_entries NULL
+#define native_ioapic_set_affinity NULL
+#define native_setup_ioapic_entry NULL
+#define native_compose_msi_msg NULL
+#define native_eoi_ioapic_pin NULL
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5fb9bbbd2f1..95fd3527f63 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -26,8 +26,6 @@
#ifdef CONFIG_IRQ_REMAP
-extern int irq_remapping_enabled;
-
extern void setup_irq_remapping_ops(void);
extern int irq_remapping_supported(void);
extern int irq_remapping_prepare(void);
@@ -40,21 +38,19 @@ extern int setup_ioapic_remapped_entry(int irq,
unsigned int destination,
int vector,
struct io_apic_irq_attr *attr);
-extern int set_remapped_irq_affinity(struct irq_data *data,
- const struct cpumask *mask,
- bool force);
extern void free_remapped_irq(int irq);
extern void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id);
-extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
-extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle);
extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
+extern void panic_if_irq_remap(const char *msg);
+extern bool setup_remapped_irq(int irq,
+ struct irq_cfg *cfg,
+ struct irq_chip *chip);
-#else /* CONFIG_IRQ_REMAP */
+void irq_remap_modify_chip_defaults(struct irq_chip *chip);
-#define irq_remapping_enabled 0
+#else /* CONFIG_IRQ_REMAP */
static inline void setup_irq_remapping_ops(void) { }
static inline int irq_remapping_supported(void) { return 0; }
@@ -71,30 +67,30 @@ static inline int setup_ioapic_remapped_entry(int irq,
{
return -ENODEV;
}
-static inline int set_remapped_irq_affinity(struct irq_data *data,
- const struct cpumask *mask,
- bool force)
-{
- return 0;
-}
static inline void free_remapped_irq(int irq) { }
static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id)
{
}
-static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
+static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
{
return -ENODEV;
}
-static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle)
+
+static inline void panic_if_irq_remap(const char *msg)
+{
+}
+
+static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
{
- return -ENODEV;
}
-static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
+
+static inline bool setup_remapped_irq(int irq,
+ struct irq_cfg *cfg,
+ struct irq_chip *chip)
{
- return -ENODEV;
+ return false;
}
#endif /* CONFIG_IRQ_REMAP */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 1508e518c7e..aac5fa62a86 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -109,8 +109,8 @@
#define UV_BAU_MESSAGE 0xf5
-/* Xen vector callback to receive events in a HVM domain */
-#define XEN_HVM_EVTCHN_CALLBACK 0xf3
+/* Vector on which hypervisor callbacks will be delivered */
+#define HYPERVISOR_CALLBACK_VECTOR 0xf3
/*
* Local APIC timer IRQ vector is on a different priority level,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 6080d2694ba..17483a492f1 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -48,11 +48,11 @@
# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
#else
/* Maximum physical address we can use pages from */
-# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL)
+# define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1)
/* Maximum address we can reach in physical address mode */
-# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
+# define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1)
/* Maximum address we can use for the control pages */
-# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL)
+# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
/* Allocate one page for the pdp and the second for the code */
# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dc87b65e9c3..635a74d2240 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,10 +33,10 @@
#define KVM_MAX_VCPUS 254
#define KVM_SOFT_MAX_VCPUS 160
-#define KVM_MEMORY_SLOTS 32
-/* memory slots that does not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS 4
-#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+#define KVM_USER_MEM_SLOTS 125
+/* memory slots that are not exposed to userspace */
+#define KVM_PRIVATE_MEM_SLOTS 3
+#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
#define KVM_MMIO_SIZE 16
@@ -219,11 +219,6 @@ struct kvm_mmu_page {
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
- /*
- * One bit set per slot which has memory
- * in this shadow page.
- */
- DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM);
bool unsync;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
@@ -502,6 +497,13 @@ struct kvm_vcpu_arch {
u64 msr_val;
struct gfn_to_hva_cache data;
} pv_eoi;
+
+ /*
+ * Indicate whether the access faults on its page table in guest
+ * which is set when fix page fault and used to detect unhandeable
+ * instruction.
+ */
+ bool write_fault_to_shadow_pgtable;
};
struct kvm_lpage_info {
@@ -697,6 +699,11 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+ int (*vm_has_apicv)(struct kvm *kvm);
+ void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
+ void (*hwapic_isr_update)(struct kvm *kvm, int isr);
+ void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
+ void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
@@ -991,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
+int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed1f16187b..695399f2d5e 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -27,7 +27,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
*
* Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
* The hypercall number should be placed in rax and the return value will be
- * placed in rax. No other registers will be clobbered unless explicited
+ * placed in rax. No other registers will be clobbered unless explicitly
* noted by the particular hypercall.
*/
@@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
return ret;
}
-static inline int kvm_para_available(void)
+static inline bool kvm_para_available(void)
{
unsigned int eax, ebx, ecx, edx;
char signature[13];
if (boot_cpu_data.cpuid_level < 0)
- return 0; /* So we don't blow up on old processors */
+ return false; /* So we don't blow up on old processors */
if (cpu_has_hypervisor) {
cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
@@ -101,10 +101,10 @@ static inline int kvm_para_available(void)
signature[12] = 0;
if (strcmp(signature, "KVMKVMKVM") == 0)
- return 1;
+ return true;
}
- return 0;
+ return false;
}
static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 48142971b25..79327e9483a 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -27,20 +27,20 @@
#define __asmlinkage_protect0(ret) \
__asmlinkage_protect_n(ret)
#define __asmlinkage_protect1(ret, arg1) \
- __asmlinkage_protect_n(ret, "g" (arg1))
+ __asmlinkage_protect_n(ret, "m" (arg1))
#define __asmlinkage_protect2(ret, arg1, arg2) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
- "g" (arg4))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+ "m" (arg4))
#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
- "g" (arg4), "g" (arg5))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+ "m" (arg4), "m" (arg5))
#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
- "g" (arg4), "g" (arg5), "g" (arg6))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+ "m" (arg4), "m" (arg5), "m" (arg6))
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ecdfee60ee4..f4076af1f4e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -3,6 +3,90 @@
#include <uapi/asm/mce.h>
+/*
+ * Machine Check support for x86
+ */
+
+/* MCG_CAP register defines */
+#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
+#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
+#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
+#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
+#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
+#define MCG_EXT_CNT_SHIFT 16
+#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
+#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
+
+/* MCG_STATUS register defines */
+#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
+#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
+#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
+
+/* MCi_STATUS register defines */
+#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
+#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
+#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
+#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
+#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
+#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+#define MCACOD 0xffff /* MCA Error Code */
+
+/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
+#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
+#define MCACOD_SCRUBMSK 0xfff0
+#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
+#define MCACOD_DATA 0x0134 /* Data Load */
+#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
+
+/* MCi_MISC register defines */
+#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
+#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
+#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
+#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
+#define MCI_MISC_ADDR_PHYS 2 /* physical address */
+#define MCI_MISC_ADDR_MEM 3 /* memory address */
+#define MCI_MISC_ADDR_GENERIC 7 /* generic */
+
+/* CTL2 register defines */
+#define MCI_CTL2_CMCI_EN (1ULL << 30)
+#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
+
+#define MCJ_CTX_MASK 3
+#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
+#define MCJ_CTX_RANDOM 0 /* inject context: random */
+#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
+#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
+#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
+#define MCJ_EXCEPTION 0x8 /* raise as exception */
+#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
+
+#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
+
+/* Software defined banks */
+#define MCE_EXTENDED_BANK 128
+#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0)
+#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
+
+#define MCE_LOG_LEN 32
+#define MCE_LOG_SIGNATURE "MACHINECHECK"
+
+/*
+ * This structure contains all data related to the MCE log. Also
+ * carries a signature to make it easier to find from external
+ * debugging tools. Each entry is only valid when its finished flag
+ * is set.
+ */
+struct mce_log {
+ char signature[12]; /* "MACHINECHECK" */
+ unsigned len; /* = MCE_LOG_LEN */
+ unsigned next;
+ unsigned flags;
+ unsigned recordlen; /* length of struct mce */
+ struct mce entry[MCE_LOG_LEN];
+};
struct mca_config {
bool dont_log_ce;
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 43d921b4752..6825e2efd1b 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -57,4 +57,18 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
static inline void __exit exit_amd_microcode(void) {}
#endif
+#ifdef CONFIG_MICROCODE_EARLY
+#define MAX_UCODE_COUNT 128
+extern void __init load_ucode_bsp(void);
+extern __init void load_ucode_ap(void);
+extern int __init save_microcode_in_initrd(void);
+#else
+static inline void __init load_ucode_bsp(void) {}
+static inline __init void load_ucode_ap(void) {}
+static inline int __init save_microcode_in_initrd(void)
+{
+ return 0;
+}
+#endif
+
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
new file mode 100644
index 00000000000..5356f927d41
--- /dev/null
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -0,0 +1,85 @@
+#ifndef _ASM_X86_MICROCODE_INTEL_H
+#define _ASM_X86_MICROCODE_INTEL_H
+
+#include <asm/microcode.h>
+
+struct microcode_header_intel {
+ unsigned int hdrver;
+ unsigned int rev;
+ unsigned int date;
+ unsigned int sig;
+ unsigned int cksum;
+ unsigned int ldrver;
+ unsigned int pf;
+ unsigned int datasize;
+ unsigned int totalsize;
+ unsigned int reserved[3];
+};
+
+struct microcode_intel {
+ struct microcode_header_intel hdr;
+ unsigned int bits[0];
+};
+
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+ unsigned int sig;
+ unsigned int pf;
+ unsigned int cksum;
+};
+
+struct extended_sigtable {
+ unsigned int count;
+ unsigned int cksum;
+ unsigned int reserved[3];
+ struct extended_signature sigs[0];
+};
+
+#define DEFAULT_UCODE_DATASIZE (2000)
+#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
+#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
+#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
+#define DWSIZE (sizeof(u32))
+
+#define get_totalsize(mc) \
+ (((struct microcode_intel *)mc)->hdr.totalsize ? \
+ ((struct microcode_intel *)mc)->hdr.totalsize : \
+ DEFAULT_UCODE_TOTALSIZE)
+
+#define get_datasize(mc) \
+ (((struct microcode_intel *)mc)->hdr.datasize ? \
+ ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
+
+#define sigmatch(s1, s2, p1, p2) \
+ (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
+
+#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
+
+extern int
+get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev);
+extern int microcode_sanity_check(void *mc, int print_err);
+extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev);
+extern int
+update_match_revision(struct microcode_header_intel *mc_header, int rev);
+
+#ifdef CONFIG_MICROCODE_INTEL_EARLY
+extern void __init load_ucode_intel_bsp(void);
+extern void __cpuinit load_ucode_intel_ap(void);
+extern void show_ucode_info_early(void);
+#else
+static inline __init void load_ucode_intel_bsp(void) {}
+static inline __cpuinit void load_ucode_intel_ap(void) {}
+static inline void show_ucode_info_early(void) {}
+#endif
+
+#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
+extern int save_mc_for_early(u8 *mc);
+#else
+static inline int save_mc_for_early(u8 *mc)
+{
+ return 0;
+}
+#endif
+
+#endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index eb05fb3b02f..8a9b3e288cb 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -14,12 +14,6 @@ extern struct pglist_data *node_data[];
#include <asm/numaq.h>
-extern void resume_map_numa_kva(pgd_t *pgd);
-
-#else /* !CONFIG_NUMA */
-
-static inline void resume_map_numa_kva(pgd_t *pgd) {}
-
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 79ce5685ab6..c2934be2446 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -11,4 +11,8 @@ struct ms_hyperv_info {
extern struct ms_hyperv_info ms_hyperv;
+void hyperv_callback_vector(void);
+void hyperv_vector_handler(struct pt_regs *regs);
+void hv_register_vmbus_handler(int irq, irq_handler_t handler);
+
#endif
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index bcdff997668..2f366d0ac6b 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -4,7 +4,8 @@
#define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf
#define MWAIT_SUBSTATE_SIZE 4
-#define MWAIT_MAX_NUM_CSTATES 8
+#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
+#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
#define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 49119fcea2d..1b99ee5c9f0 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -54,13 +54,11 @@ static inline int numa_cpu_node(int cpu)
#ifdef CONFIG_X86_32
# include <asm/numa_32.h>
-#else
-# include <asm/numa_64.h>
#endif
#ifdef CONFIG_NUMA
-extern void __cpuinit numa_set_node(int cpu, int node);
-extern void __cpuinit numa_clear_node(int cpu);
+extern void numa_set_node(int cpu, int node);
+extern void numa_clear_node(int cpu);
extern void __init init_cpu_to_node(void);
extern void __cpuinit numa_add_cpu(int cpu);
extern void __cpuinit numa_remove_cpu(int cpu);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
deleted file mode 100644
index 0c05f7ae46e..00000000000
--- a/arch/x86/include/asm/numa_64.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_X86_NUMA_64_H
-#define _ASM_X86_NUMA_64_H
-
-extern unsigned long numa_free_all_bootmem(void);
-
-#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 8ca82839288..c87892442e5 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -17,6 +17,10 @@
struct page;
+#include <linux/range.h>
+extern struct range pfn_mapped[];
+extern int nr_pfn_mapped;
+
static inline void clear_user_page(void *page, unsigned long vaddr,
struct page *pg)
{
@@ -44,7 +48,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
* case properly. Once all supported versions of gcc understand it, we can
* remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated)
*/
-#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x)))
+#define __pa_symbol(x) \
+ __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index da4e762406f..4d550d04b60 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long);
#else
#define __phys_addr(x) __phys_addr_nodebug(x)
#endif
+#define __phys_addr_symbol(x) __phys_addr(x)
#define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
#ifdef CONFIG_FLATMEM
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 072694ed81a..0f1ddee6a0c 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -3,4 +3,40 @@
#include <asm/page_64_types.h>
+#ifndef __ASSEMBLY__
+
+/* duplicated to the one in bootmem.h */
+extern unsigned long max_pfn;
+extern unsigned long phys_base;
+
+static inline unsigned long __phys_addr_nodebug(unsigned long x)
+{
+ unsigned long y = x - __START_KERNEL_map;
+
+ /* use the carry flag to determine if x was < __START_KERNEL_map */
+ x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
+
+ return x;
+}
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+extern unsigned long __phys_addr(unsigned long);
+extern unsigned long __phys_addr_symbol(unsigned long);
+#else
+#define __phys_addr(x) __phys_addr_nodebug(x)
+#define __phys_addr_symbol(x) \
+ ((unsigned long)(x) - __START_KERNEL_map + phys_base)
+#endif
+
+#define __phys_reloc_hide(x) (x)
+
+#ifdef CONFIG_FLATMEM
+#define pfn_valid(pfn) ((pfn) < max_pfn)
+#endif
+
+void clear_page(void *page);
+void copy_page(void *to, void *from);
+
+#endif /* !__ASSEMBLY__ */
+
#endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 320f7bb95f7..8b491e66eaa 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -50,26 +50,4 @@
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
-#ifndef __ASSEMBLY__
-void clear_page(void *page);
-void copy_page(void *to, void *from);
-
-/* duplicated to the one in bootmem.h */
-extern unsigned long max_pfn;
-extern unsigned long phys_base;
-
-extern unsigned long __phys_addr(unsigned long);
-#define __phys_reloc_hide(x) (x)
-
-#define vmemmap ((struct page *)VMEMMAP_START)
-
-extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
-extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
-
-#endif /* !__ASSEMBLY__ */
-
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn) ((pfn) < max_pfn)
-#endif
-
#endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index e21fdd10479..54c97879195 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,6 +51,8 @@ static inline phys_addr_t get_max_mapped(void)
return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
}
+bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
+
extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);
diff --git a/arch/x86/include/asm/parport.h b/arch/x86/include/asm/parport.h
index 3c4ffeb467e..0d2d3b29118 100644
--- a/arch/x86/include/asm/parport.h
+++ b/arch/x86/include/asm/parport.h
@@ -1,8 +1,8 @@
#ifndef _ASM_X86_PARPORT_H
#define _ASM_X86_PARPORT_H
-static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma);
-static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma)
+static int parport_pc_find_isa_ports(int autoirq, int autodma);
+static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
{
return parport_pc_find_isa_ports(autoirq, autodma);
}
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index dba7805176b..c28fd02f4bf 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
#define arch_teardown_msi_irq x86_teardown_msi_irq
#define arch_restore_msi_irqs x86_restore_msi_irqs
/* implemented in arch/x86/kernel/apic/io_apic. */
+struct msi_desc;
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void native_teardown_msi_irq(unsigned int irq);
void native_restore_msi_irqs(struct pci_dev *dev, int irq);
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ unsigned int irq_base, unsigned int irq_offset);
/* default to the implementation in drivers/lib/msi.c */
#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
#define HAVE_DEFAULT_MSI_RESTORE_IRQS
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 73e8eeff22e..747e5a38b59 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -140,11 +140,10 @@ struct pci_mmcfg_region {
extern int __init pci_mmcfg_arch_init(void);
extern void __init pci_mmcfg_arch_free(void);
-extern int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
+extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
-extern int __devinit pci_mmconfig_insert(struct device *dev,
- u16 seg, u8 start,
- u8 end, phys_addr_t addr);
+extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
+ phys_addr_t addr);
extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf1cfa..57cb6340221 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,8 +29,13 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
-#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40)
-#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41)
+#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
+#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
+#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
+
+#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
+#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
+ (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
#define AMD64_EVENTSEL_EVENT \
(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
@@ -46,8 +51,12 @@
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
+#define AMD64_RAW_EVENT_MASK_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK)
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
+#define AMD64_NUM_COUNTERS_NB 4
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5199db2923d..1e672234c4f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
@@ -390,6 +395,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
#ifndef __ASSEMBLY__
#include <linux/mm_types.h>
+#include <linux/log2.h>
static inline int pte_none(pte_t pte)
{
@@ -615,6 +621,8 @@ static inline int pgd_none(pgd_t pgd)
#ifndef __ASSEMBLY__
extern int direct_gbpages;
+void init_mem_mapping(void);
+void early_alloc_pgt_buf(void);
/* local pte updates need not use xchg for locking */
static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
@@ -781,6 +789,32 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
memcpy(dst, src, count * sizeof(pgd_t));
}
+#define PTE_SHIFT ilog2(PTRS_PER_PTE)
+static inline int page_level_shift(enum pg_level level)
+{
+ return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT;
+}
+static inline unsigned long page_level_size(enum pg_level level)
+{
+ return 1UL << page_level_shift(level);
+}
+static inline unsigned long page_level_mask(enum pg_level level)
+{
+ return ~(page_level_size(level) - 1);
+}
+
+/*
+ * The x86 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+}
+static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd)
+{
+}
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 8faa215a503..9ee322103c6 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -66,13 +66,6 @@ do { \
__flush_tlb_one((vaddr)); \
} while (0)
-/*
- * The i386 doesn't have any external MMU info: the kernel page
- * tables contain all the necessary information.
- */
-#define update_mmu_cache(vma, address, ptep) do { } while (0)
-#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-
#endif /* !__ASSEMBLY__ */
/*
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 47356f9df82..e22c1dbf7fe 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
#define pte_unmap(pte) ((void)(pte))/* NOP */
-#define update_mmu_cache(vma, address, ptep) do { } while (0)
-#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
@@ -183,6 +180,11 @@ extern void cleanup_highmap(void);
#define __HAVE_ARCH_PTE_SAME
+#define vmemmap ((struct page *)VMEMMAP_START)
+
+extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
+extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 766ea16fbbb..2d883440cb9 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_PGTABLE_64_DEFS_H
#define _ASM_X86_PGTABLE_64_DEFS_H
+#include <asm/sparsemem.h>
+
#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -60,4 +62,6 @@ typedef struct { pteval_t pte; } pte_t;
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
+#define EARLY_DYNAMIC_PAGE_TABLES 64
+
#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3c32db8c539..567b5d0632b 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -321,7 +321,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);
-extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
extern void native_pagetable_init(void);
#else
@@ -331,7 +330,7 @@ extern void native_pagetable_init(void);
struct seq_file;
extern void arch_report_meminfo(struct seq_file *m);
-enum {
+enum pg_level {
PG_LEVEL_NONE,
PG_LEVEL_4K,
PG_LEVEL_2M,
@@ -352,6 +351,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
* as a pte too.
*/
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
+extern int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase);
+extern phys_addr_t slow_virt_to_phys(void *__address);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 888184b2fc8..3270116b148 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -89,7 +89,6 @@ struct cpuinfo_x86 {
char wp_works_ok; /* It doesn't on 386's */
/* Problems on some 486Dx4's and old 386's: */
- char hlt_works_ok;
char hard_math;
char rfu;
char fdiv_bug;
@@ -165,15 +164,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
extern const struct seq_operations cpuinfo_op;
-static inline int hlt_works(int cpu)
-{
-#ifdef CONFIG_X86_32
- return cpu_data(cpu).hlt_works_ok;
-#else
- return 1;
-#endif
-}
-
#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
extern void cpu_detect(struct cpuinfo_x86 *c);
@@ -190,6 +180,14 @@ extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
extern void detect_extended_topology(struct cpuinfo_x86 *c);
extern void detect_ht(struct cpuinfo_x86 *c);
+#ifdef CONFIG_X86_32
+extern int have_cpuid_p(void);
+#else
+static inline int have_cpuid_p(void)
+{
+ return 1;
+}
+#endif
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
@@ -725,12 +723,13 @@ extern unsigned long boot_option_idle_override;
extern bool amd_e400_c1e_detected;
enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
- IDLE_POLL, IDLE_FORCE_MWAIT};
+ IDLE_POLL};
extern void enable_sep_cpu(void);
extern int sysenter_setup(void);
extern void early_trap_init(void);
+void early_trap_pf_init(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
@@ -943,7 +942,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
-extern int amd_get_nb_id(int cpu);
+extern u16 amd_get_nb_id(int cpu);
struct aperfmperf {
u64 aperf, mperf;
@@ -998,7 +997,11 @@ extern unsigned long arch_align_stack(unsigned long sp);
extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
void default_idle(void);
-bool set_pm_idle_to_default(void);
+#ifdef CONFIG_XEN
+bool xen_set_default_idle(void);
+#else
+#define xen_set_default_idle 0
+#endif
void stop_this_cpu(void *dummy);
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6f414ed8862..6fd3fd76979 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -5,8 +5,6 @@
/* misc architecture specific prototypes */
-void early_idt_handler(void);
-
void system_call(void);
void syscall_init(void);
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index fe1ec5bcd84..9c6b890d5e7 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -58,6 +58,7 @@ extern unsigned char boot_gdt[];
extern unsigned char secondary_startup_64[];
#endif
-extern void __init setup_real_mode(void);
+void reserve_real_mode(void);
+void setup_real_mode(void);
#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 6c7fc25f2c3..5c6e4fb370f 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -47,6 +47,12 @@
# define NEED_NOPL 0
#endif
+#ifdef CONFIG_MATOM
+# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
+#else
+# define NEED_MOVBE 0
+#endif
+
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */
@@ -80,7 +86,7 @@
#define REQUIRED_MASK2 0
#define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 0
+#define REQUIRED_MASK4 (NEED_MOVBE)
#define REQUIRED_MASK5 0
#define REQUIRED_MASK6 0
#define REQUIRED_MASK7 0
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 216bf364a7e..35e67a45718 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,27 +31,9 @@ typedef sigset_t compat_sigset_t;
#include <uapi/asm/signal.h>
#ifndef __ASSEMBLY__
extern void do_notify_resume(struct pt_regs *, void *, __u32);
-#ifdef __i386__
-struct old_sigaction {
- __sighandler_t sa_handler;
- old_sigset_t sa_mask;
- unsigned long sa_flags;
- __sigrestore_t sa_restorer;
-};
-
-struct sigaction {
- __sighandler_t sa_handler;
- unsigned long sa_flags;
- __sigrestore_t sa_restorer;
- sigset_t sa_mask; /* mask last for extensibility */
-};
-
-struct k_sigaction {
- struct sigaction sa;
-};
-#else /* __i386__ */
-#endif /* !__i386__ */
+#define __ARCH_HAS_SA_RESTORER
+
#include <asm/sigcontext.h>
#ifdef __i386__
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 31f61f96e0f..0218d917f50 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -32,22 +32,11 @@ struct mmap_arg_struct32;
asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
-struct sigaction32;
-struct old_sigaction32;
-asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
- struct sigaction32 __user *, unsigned int);
-asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
- struct old_sigaction32 __user *);
asmlinkage long sys32_alarm(unsigned int);
asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
asmlinkage long sys32_sysfs(int, u32, u32);
-asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
- struct compat_timespec __user *);
-asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
-asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
-
asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
@@ -68,9 +57,8 @@ asmlinkage long sys32_fallocate(int, int, unsigned,
unsigned, unsigned, unsigned);
/* ia32/ia32_signal.c */
-asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
-asmlinkage long sys32_sigreturn(struct pt_regs *);
-asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
+asmlinkage long sys32_sigreturn(void);
+asmlinkage long sys32_rt_sigreturn(void);
/* ia32/ipc32.c */
asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 58b7e3eac0a..6cf0a9cc60c 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -18,13 +18,13 @@
/* Common in X86_32 and X86_64 */
/* kernel/ioport.c */
asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
-long sys_iopl(unsigned int, struct pt_regs *);
+asmlinkage long sys_iopl(unsigned int);
/* kernel/ldt.c */
asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
/* kernel/signal.c */
-long sys_rt_sigreturn(struct pt_regs *);
+long sys_rt_sigreturn(void);
/* kernel/tls.c */
asmlinkage int sys_set_thread_area(struct user_desc __user *);
@@ -34,14 +34,11 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
#ifdef CONFIG_X86_32
/* kernel/signal.c */
-asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
-asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
- struct old_sigaction __user *);
-unsigned long sys_sigreturn(struct pt_regs *);
+unsigned long sys_sigreturn(void);
/* kernel/vm86_32.c */
-int sys_vm86old(struct vm86_struct __user *, struct pt_regs *);
-int sys_vm86(unsigned long, unsigned long, struct pt_regs *);
+int sys_vm86old(struct vm86_struct __user *);
+int sys_vm86(unsigned long, unsigned long);
#else /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0fee48e279c..50a7fc0f824 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -20,10 +20,20 @@ static inline void __native_flush_tlb(void)
native_write_cr3(native_read_cr3());
}
+static inline void __native_flush_tlb_global_irq_disabled(void)
+{
+ unsigned long cr4;
+
+ cr4 = native_read_cr4();
+ /* clear PGE */
+ native_write_cr4(cr4 & ~X86_CR4_PGE);
+ /* write old PGE again and flush TLBs */
+ native_write_cr4(cr4);
+}
+
static inline void __native_flush_tlb_global(void)
{
unsigned long flags;
- unsigned long cr4;
/*
* Read-modify-write to CR4 - protect it from preemption and
@@ -32,11 +42,7 @@ static inline void __native_flush_tlb_global(void)
*/
raw_local_irq_save(flags);
- cr4 = native_read_cr4();
- /* clear PGE */
- native_write_cr4(cr4 & ~X86_CR4_PGE);
- /* write old PGE again and flush TLBs */
- native_write_cr4(cr4);
+ __native_flush_tlb_global_irq_disabled();
raw_local_irq_restore(flags);
}
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1709801d18e..5ee26875bae 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -125,13 +125,12 @@ extern int __get_user_4(void);
extern int __get_user_8(void);
extern int __get_user_bad(void);
-#define __get_user_x(size, ret, x, ptr) \
- asm volatile("call __get_user_" #size \
- : "=a" (ret), "=d" (x) \
- : "0" (ptr)) \
-
-/* Careful: we have to cast the result to the type of the pointer
- * for sign reasons */
+/*
+ * This is a type: either unsigned long, if the argument fits into
+ * that type, or otherwise unsigned long long.
+ */
+#define __inttype(x) \
+__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
/**
* get_user: - Get a simple variable from user space.
@@ -150,38 +149,26 @@ extern int __get_user_bad(void);
* Returns zero on success, or -EFAULT on error.
* On error, the variable @x is set to zero.
*/
-#ifdef CONFIG_X86_32
-#define __get_user_8(__ret_gu, __val_gu, ptr) \
- __get_user_x(X, __ret_gu, __val_gu, ptr)
-#else
-#define __get_user_8(__ret_gu, __val_gu, ptr) \
- __get_user_x(8, __ret_gu, __val_gu, ptr)
-#endif
-
+/*
+ * Careful: we have to cast the result to the type of the pointer
+ * for sign reasons.
+ *
+ * The use of %edx as the register specifier is a bit of a
+ * simplification, as gcc only cares about it as the starting point
+ * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits
+ * (%ecx being the next register in gcc's x86 register sequence), and
+ * %rdx on 64 bits.
+ */
#define get_user(x, ptr) \
({ \
int __ret_gu; \
- unsigned long __val_gu; \
+ register __inttype(*(ptr)) __val_gu asm("%edx"); \
__chk_user_ptr(ptr); \
might_fault(); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __get_user_x(1, __ret_gu, __val_gu, ptr); \
- break; \
- case 2: \
- __get_user_x(2, __ret_gu, __val_gu, ptr); \
- break; \
- case 4: \
- __get_user_x(4, __ret_gu, __val_gu, ptr); \
- break; \
- case 8: \
- __get_user_8(__ret_gu, __val_gu, ptr); \
- break; \
- default: \
- __get_user_x(X, __ret_gu, __val_gu, ptr); \
- break; \
- } \
- (x) = (__typeof__(*(ptr)))__val_gu; \
+ asm volatile("call __get_user_%P3" \
+ : "=a" (__ret_gu), "=r" (__val_gu) \
+ : "0" (ptr), "i" (sizeof(*(ptr)))); \
+ (x) = (__typeof__(*(ptr))) __val_gu; \
__ret_gu; \
})
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index a0790e07ba6..3d5df1c4447 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -38,8 +38,6 @@
# define __ARCH_WANT_SYS_OLD_GETRLIMIT
# define __ARCH_WANT_SYS_OLD_UNAME
# define __ARCH_WANT_SYS_PAUSE
-# define __ARCH_WANT_SYS_RT_SIGACTION
-# define __ARCH_WANT_SYS_RT_SIGSUSPEND
# define __ARCH_WANT_SYS_SGETMASK
# define __ARCH_WANT_SYS_SIGNAL
# define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index b47c2a82ff1..062921ef34e 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -16,7 +16,7 @@ extern void uv_system_init(void);
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long start,
- unsigned end,
+ unsigned long end,
unsigned int cpu);
#else /* X86_UV */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 21f7385badb..2c32df95bb7 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -5,7 +5,7 @@
*
* SGI UV architectural definitions
*
- * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
*/
#ifndef _ASM_X86_UV_UV_HUB_H
@@ -175,6 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
*/
#define UV1_HUB_REVISION_BASE 1
#define UV2_HUB_REVISION_BASE 3
+#define UV3_HUB_REVISION_BASE 5
static inline int is_uv1_hub(void)
{
@@ -183,6 +184,23 @@ static inline int is_uv1_hub(void)
static inline int is_uv2_hub(void)
{
+ return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
+ (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
+}
+
+static inline int is_uv3_hub(void)
+{
+ return uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE;
+}
+
+static inline int is_uv_hub(void)
+{
+ return uv_hub_info->hub_revision;
+}
+
+/* code common to uv2 and uv3 only */
+static inline int is_uvx_hub(void)
+{
return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE;
}
@@ -230,14 +248,23 @@ union uvh_apicid {
#define UV2_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
#define UV2_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024)
-#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE \
- : UV2_LOCAL_MMR_BASE)
-#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE \
- : UV2_GLOBAL_MMR32_BASE)
-#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
- UV2_LOCAL_MMR_SIZE)
+#define UV3_LOCAL_MMR_BASE 0xfa000000UL
+#define UV3_GLOBAL_MMR32_BASE 0xfc000000UL
+#define UV3_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
+#define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024)
+
+#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \
+ (is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
+ UV3_LOCAL_MMR_BASE))
+#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE :\
+ (is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE :\
+ UV3_GLOBAL_MMR32_BASE))
+#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
+ (is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
+ UV3_LOCAL_MMR_SIZE))
#define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\
- UV2_GLOBAL_MMR32_SIZE)
+ (is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE :\
+ UV3_GLOBAL_MMR32_SIZE))
#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base)
#define UV_GLOBAL_GRU_MMR_BASE 0x4000000
@@ -599,6 +626,7 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
* 1 - UV1 rev 1.0 initial silicon
* 2 - UV1 rev 2.0 production silicon
* 3 - UV2 rev 1.0 initial silicon
+ * 5 - UV3 rev 1.0 initial silicon
*/
static inline int uv_get_min_hub_revision_id(void)
{
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index cf1d73643f6..bd5f80e58a2 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,16 +5,25 @@
*
* SGI UV MMR definitions
*
- * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
*/
#ifndef _ASM_X86_UV_UV_MMRS_H
#define _ASM_X86_UV_UV_MMRS_H
/*
- * This file contains MMR definitions for both UV1 & UV2 hubs.
+ * This file contains MMR definitions for all UV hubs types.
*
- * In general, MMR addresses and structures are identical on both hubs.
+ * To minimize coding differences between hub types, the symbols are
+ * grouped by architecture types.
+ *
+ * UVH - definitions common to all UV hub types.
+ * UVXH - definitions common to all UV eXtended hub types (currently 2 & 3).
+ * UV1H - definitions specific to UV type 1 hub.
+ * UV2H - definitions specific to UV type 2 hub.
+ * UV3H - definitions specific to UV type 3 hub.
+ *
+ * So in general, MMR addresses and structures are identical on all hubs types.
* These MMRs are identified as:
* #define UVH_xxx <address>
* union uvh_xxx {
@@ -23,24 +32,36 @@
* } s;
* };
*
- * If the MMR exists on both hub type but has different addresses or
- * contents, the MMR definition is similar to:
- * #define UV1H_xxx <uv1 address>
- * #define UV2H_xxx <uv2address>
- * #define UVH_xxx (is_uv1_hub() ? UV1H_xxx : UV2H_xxx)
+ * If the MMR exists on all hub types but have different addresses:
+ * #define UV1Hxxx a
+ * #define UV2Hxxx b
+ * #define UV3Hxxx c
+ * #define UVHxxx (is_uv1_hub() ? UV1Hxxx :
+ * (is_uv2_hub() ? UV2Hxxx :
+ * UV3Hxxx))
+ *
+ * If the MMR exists on all hub types > 1 but have different addresses:
+ * #define UV2Hxxx b
+ * #define UV3Hxxx c
+ * #define UVXHxxx (is_uv2_hub() ? UV2Hxxx :
+ * UV3Hxxx))
+ *
* union uvh_xxx {
* unsigned long v;
- * struct uv1h_int_cmpd_s { (Common fields only)
+ * struct uvh_xxx_s { # Common fields only
* } s;
- * struct uv1h_int_cmpd_s { (Full UV1 definition)
+ * struct uv1h_xxx_s { # Full UV1 definition (*)
* } s1;
- * struct uv2h_int_cmpd_s { (Full UV2 definition)
+ * struct uv2h_xxx_s { # Full UV2 definition (*)
* } s2;
+ * struct uv3h_xxx_s { # Full UV3 definition (*)
+ * } s3;
* };
+ * (* - if present and different than the common struct)
*
- * Only essential difference are enumerated. For example, if the address is
- * the same for both UV1 & UV2, only a single #define is generated. Likewise,
- * if the contents is the same for both hubs, only the "s" structure is
+ * Only essential differences are enumerated. For example, if the address is
+ * the same for all UV's, only a single #define is generated. Likewise,
+ * if the contents is the same for all hubs, only the "s" structure is
* generated.
*
* If the MMR exists on ONLY 1 type of hub, no generic definition is
@@ -51,6 +72,8 @@
* struct uvh_int_cmpd_s {
* } sn;
* };
+ *
+ * (GEN Flags: mflags_opt= undefs=0 UV23=UVXH)
*/
#define UV_MMR_ENABLE (1UL << 63)
@@ -58,15 +81,18 @@
#define UV1_HUB_PART_NUMBER 0x88a5
#define UV2_HUB_PART_NUMBER 0x8eb8
#define UV2_HUB_PART_NUMBER_X 0x1111
+#define UV3_HUB_PART_NUMBER 0x9578
+#define UV3_HUB_PART_NUMBER_X 0x4321
-/* Compat: if this #define is present, UV headers support UV2 */
+/* Compat: Indicate which UV Hubs are supported. */
#define UV2_HUB_IS_SUPPORTED 1
+#define UV3_HUB_IS_SUPPORTED 1
/* ========================================================================= */
/* UVH_BAU_DATA_BROADCAST */
/* ========================================================================= */
-#define UVH_BAU_DATA_BROADCAST 0x61688UL
-#define UVH_BAU_DATA_BROADCAST_32 0x440
+#define UVH_BAU_DATA_BROADCAST 0x61688UL
+#define UVH_BAU_DATA_BROADCAST_32 0x440
#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0
#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL
@@ -82,8 +108,8 @@ union uvh_bau_data_broadcast_u {
/* ========================================================================= */
/* UVH_BAU_DATA_CONFIG */
/* ========================================================================= */
-#define UVH_BAU_DATA_CONFIG 0x61680UL
-#define UVH_BAU_DATA_CONFIG_32 0x438
+#define UVH_BAU_DATA_CONFIG 0x61680UL
+#define UVH_BAU_DATA_CONFIG_32 0x438
#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
@@ -121,10 +147,14 @@ union uvh_bau_data_config_u {
/* ========================================================================= */
/* UVH_EVENT_OCCURRED0 */
/* ========================================================================= */
-#define UVH_EVENT_OCCURRED0 0x70000UL
-#define UVH_EVENT_OCCURRED0_32 0x5e8
+#define UVH_EVENT_OCCURRED0 0x70000UL
+#define UVH_EVENT_OCCURRED0_32 0x5e8
+
+#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
+#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
+#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
+#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
-#define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0
#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3
@@ -135,7 +165,6 @@ union uvh_bau_data_config_u {
#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
-#define UV1H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
@@ -181,7 +210,6 @@ union uvh_bau_data_config_u {
#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54
#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55
#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
-#define UV1H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
@@ -192,7 +220,6 @@ union uvh_bau_data_config_u {
#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
-#define UV1H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
@@ -239,188 +266,130 @@ union uvh_bau_data_config_u {
#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
-#define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2
-#define UV2H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
-#define UV2H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
-#define UV2H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5
-#define UV2H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6
-#define UV2H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
-#define UV2H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
-#define UV2H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
-#define UV2H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
-#define UV2H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
-#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
-#define UV2H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL
-#define UV2H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL
-#define UV2H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL
-#define UV2H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL
-#define UV2H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL
-#define UV2H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL
-#define UV2H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL
-#define UV2H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL
-#define UV2H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
-#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
+#define UVXH_EVENT_OCCURRED0_QP_HCERR_SHFT 1
+#define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2
+#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
+#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
+#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5
+#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6
+#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
+#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
+#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
+#define UVXH_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
+#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
+#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
+#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
+#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
+#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
+#define UVXH_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
+#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
+#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
+#define UVXH_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
+#define UVXH_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
+#define UVXH_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
+#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
+#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
+#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
+#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
+#define UVXH_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
+#define UVXH_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
+#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
+#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
+#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
+#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
+#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
+#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
+#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
+#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
+#define UVXH_EVENT_OCCURRED0_IPI_INT_SHFT 53
+#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
+#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
+#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
+#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
+#define UVXH_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
+#define UVXH_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
+#define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL
+#define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL
+#define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL
+#define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL
+#define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL
+#define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL
+#define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL
+#define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL
+#define UVXH_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
+#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL
+#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL
+#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL
+#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL
+#define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL
+#define UVXH_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
+#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
+#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
+#define UVXH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
+#define UVXH_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
+#define UVXH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
+#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
+#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
+#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
+#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
+#define UVXH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
+#define UVXH_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
+#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
+#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
+#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
+#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
+#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
+#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
+#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
+#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
+#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
+#define UVXH_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
+#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
+#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
+#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
+#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
+#define UVXH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
union uvh_event_occurred0_u {
unsigned long v;
- struct uv1h_event_occurred0_s {
+ struct uvh_event_occurred0_s {
unsigned long lb_hcerr:1; /* RW, W1C */
- unsigned long gr0_hcerr:1; /* RW, W1C */
- unsigned long gr1_hcerr:1; /* RW, W1C */
- unsigned long lh_hcerr:1; /* RW, W1C */
- unsigned long rh_hcerr:1; /* RW, W1C */
- unsigned long xn_hcerr:1; /* RW, W1C */
- unsigned long si_hcerr:1; /* RW, W1C */
- unsigned long lb_aoerr0:1; /* RW, W1C */
- unsigned long gr0_aoerr0:1; /* RW, W1C */
- unsigned long gr1_aoerr0:1; /* RW, W1C */
- unsigned long lh_aoerr0:1; /* RW, W1C */
+ unsigned long rsvd_1_10:10;
unsigned long rh_aoerr0:1; /* RW, W1C */
- unsigned long xn_aoerr0:1; /* RW, W1C */
- unsigned long si_aoerr0:1; /* RW, W1C */
- unsigned long lb_aoerr1:1; /* RW, W1C */
- unsigned long gr0_aoerr1:1; /* RW, W1C */
- unsigned long gr1_aoerr1:1; /* RW, W1C */
- unsigned long lh_aoerr1:1; /* RW, W1C */
- unsigned long rh_aoerr1:1; /* RW, W1C */
- unsigned long xn_aoerr1:1; /* RW, W1C */
- unsigned long si_aoerr1:1; /* RW, W1C */
- unsigned long rh_vpi_int:1; /* RW, W1C */
- unsigned long system_shutdown_int:1; /* RW, W1C */
- unsigned long lb_irq_int_0:1; /* RW, W1C */
- unsigned long lb_irq_int_1:1; /* RW, W1C */
- unsigned long lb_irq_int_2:1; /* RW, W1C */
- unsigned long lb_irq_int_3:1; /* RW, W1C */
- unsigned long lb_irq_int_4:1; /* RW, W1C */
- unsigned long lb_irq_int_5:1; /* RW, W1C */
- unsigned long lb_irq_int_6:1; /* RW, W1C */
- unsigned long lb_irq_int_7:1; /* RW, W1C */
- unsigned long lb_irq_int_8:1; /* RW, W1C */
- unsigned long lb_irq_int_9:1; /* RW, W1C */
- unsigned long lb_irq_int_10:1; /* RW, W1C */
- unsigned long lb_irq_int_11:1; /* RW, W1C */
- unsigned long lb_irq_int_12:1; /* RW, W1C */
- unsigned long lb_irq_int_13:1; /* RW, W1C */
- unsigned long lb_irq_int_14:1; /* RW, W1C */
- unsigned long lb_irq_int_15:1; /* RW, W1C */
- unsigned long l1_nmi_int:1; /* RW, W1C */
- unsigned long stop_clock:1; /* RW, W1C */
- unsigned long asic_to_l1:1; /* RW, W1C */
- unsigned long l1_to_asic:1; /* RW, W1C */
- unsigned long ltc_int:1; /* RW, W1C */
- unsigned long la_seq_trigger:1; /* RW, W1C */
- unsigned long ipi_int:1; /* RW, W1C */
- unsigned long extio_int0:1; /* RW, W1C */
- unsigned long extio_int1:1; /* RW, W1C */
- unsigned long extio_int2:1; /* RW, W1C */
- unsigned long extio_int3:1; /* RW, W1C */
- unsigned long profile_int:1; /* RW, W1C */
- unsigned long rtc0:1; /* RW, W1C */
- unsigned long rtc1:1; /* RW, W1C */
- unsigned long rtc2:1; /* RW, W1C */
- unsigned long rtc3:1; /* RW, W1C */
- unsigned long bau_data:1; /* RW, W1C */
- unsigned long power_management_req:1; /* RW, W1C */
- unsigned long rsvd_57_63:7;
- } s1;
- struct uv2h_event_occurred0_s {
+ unsigned long rsvd_12_63:52;
+ } s;
+ struct uvxh_event_occurred0_s {
unsigned long lb_hcerr:1; /* RW */
unsigned long qp_hcerr:1; /* RW */
unsigned long rh_hcerr:1; /* RW */
@@ -481,19 +450,20 @@ union uvh_event_occurred0_u {
unsigned long extio_int3:1; /* RW */
unsigned long profile_int:1; /* RW */
unsigned long rsvd_59_63:5;
- } s2;
+ } sx;
};
/* ========================================================================= */
/* UVH_EVENT_OCCURRED0_ALIAS */
/* ========================================================================= */
-#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL
-#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0
+#define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL
+#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0
+
/* ========================================================================= */
/* UVH_GR0_TLB_INT0_CONFIG */
/* ========================================================================= */
-#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
+#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
@@ -531,7 +501,7 @@ union uvh_gr0_tlb_int0_config_u {
/* ========================================================================= */
/* UVH_GR0_TLB_INT1_CONFIG */
/* ========================================================================= */
-#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
+#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
@@ -571,9 +541,11 @@ union uvh_gr0_tlb_int1_config_u {
/* ========================================================================= */
#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL
#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UVH_GR0_TLB_MMR_CONTROL (is_uv1_hub() ? \
- UV1H_GR0_TLB_MMR_CONTROL : \
- UV2H_GR0_TLB_MMR_CONTROL)
+#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL
+#define UVH_GR0_TLB_MMR_CONTROL \
+ (is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \
+ (is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \
+ UV3H_GR0_TLB_MMR_CONTROL))
#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
@@ -611,6 +583,21 @@ union uvh_gr0_tlb_int1_config_u {
#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL
#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
+#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
+#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
+#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
+#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
+#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
+#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
+#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
+#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
+#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
+#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
+#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
+#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
+#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
+#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
+
#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
@@ -630,6 +617,23 @@ union uvh_gr0_tlb_int1_config_u {
#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
+#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
+#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
+#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
+#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
+#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
+#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
+#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
+#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
+#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
+#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
+#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
+#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
+#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
+#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
+#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
+#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
+
union uvh_gr0_tlb_mmr_control_u {
unsigned long v;
struct uvh_gr0_tlb_mmr_control_s {
@@ -642,7 +646,9 @@ union uvh_gr0_tlb_mmr_control_u {
unsigned long rsvd_21_29:9;
unsigned long mmr_write:1; /* WP */
unsigned long mmr_read:1; /* WP */
- unsigned long rsvd_32_63:32;
+ unsigned long rsvd_32_48:17;
+ unsigned long rsvd_49_51:3;
+ unsigned long rsvd_52_63:12;
} s;
struct uv1h_gr0_tlb_mmr_control_s {
unsigned long index:12; /* RW */
@@ -666,6 +672,23 @@ union uvh_gr0_tlb_mmr_control_u {
unsigned long mmr_inj_tlblruv:1; /* RW */
unsigned long rsvd_61_63:3;
} s1;
+ struct uvxh_gr0_tlb_mmr_control_s {
+ unsigned long index:12; /* RW */
+ unsigned long mem_sel:2; /* RW */
+ unsigned long rsvd_14_15:2;
+ unsigned long auto_valid_en:1; /* RW */
+ unsigned long rsvd_17_19:3;
+ unsigned long mmr_hash_index_en:1; /* RW */
+ unsigned long rsvd_21_29:9;
+ unsigned long mmr_write:1; /* WP */
+ unsigned long mmr_read:1; /* WP */
+ unsigned long mmr_op_done:1; /* RW */
+ unsigned long rsvd_33_47:15;
+ unsigned long rsvd_48:1;
+ unsigned long rsvd_49_51:3;
+ unsigned long rsvd_52:1;
+ unsigned long rsvd_53_63:11;
+ } sx;
struct uv2h_gr0_tlb_mmr_control_s {
unsigned long index:12; /* RW */
unsigned long mem_sel:2; /* RW */
@@ -683,6 +706,24 @@ union uvh_gr0_tlb_mmr_control_u {
unsigned long mmr_inj_tlbram:1; /* RW */
unsigned long rsvd_53_63:11;
} s2;
+ struct uv3h_gr0_tlb_mmr_control_s {
+ unsigned long index:12; /* RW */
+ unsigned long mem_sel:2; /* RW */
+ unsigned long rsvd_14_15:2;
+ unsigned long auto_valid_en:1; /* RW */
+ unsigned long rsvd_17_19:3;
+ unsigned long mmr_hash_index_en:1; /* RW */
+ unsigned long ecc_sel:1; /* RW */
+ unsigned long rsvd_22_29:8;
+ unsigned long mmr_write:1; /* WP */
+ unsigned long mmr_read:1; /* WP */
+ unsigned long mmr_op_done:1; /* RW */
+ unsigned long rsvd_33_47:15;
+ unsigned long undef_48:1; /* Undefined */
+ unsigned long rsvd_49_51:3;
+ unsigned long undef_52:1; /* Undefined */
+ unsigned long rsvd_53_63:11;
+ } s3;
};
/* ========================================================================= */
@@ -690,9 +731,11 @@ union uvh_gr0_tlb_mmr_control_u {
/* ========================================================================= */
#define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL
#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \
- UV1H_GR0_TLB_MMR_READ_DATA_HI : \
- UV2H_GR0_TLB_MMR_READ_DATA_HI)
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
+#define UVH_GR0_TLB_MMR_READ_DATA_HI \
+ (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \
+ (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \
+ UV3H_GR0_TLB_MMR_READ_DATA_HI))
#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -703,6 +746,46 @@ union uvh_gr0_tlb_mmr_control_u {
#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
+#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
+#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
+#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
+#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
+#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
+#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+
+#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
+#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
+#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
+#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
+#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
+#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
+#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+
+#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
+#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
+#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
+#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
+#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
+#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
+#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
+#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
+
union uvh_gr0_tlb_mmr_read_data_hi_u {
unsigned long v;
struct uvh_gr0_tlb_mmr_read_data_hi_s {
@@ -712,6 +795,36 @@ union uvh_gr0_tlb_mmr_read_data_hi_u {
unsigned long larger:1; /* RO */
unsigned long rsvd_45_63:19;
} s;
+ struct uv1h_gr0_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:41; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long rsvd_45_63:19;
+ } s1;
+ struct uvxh_gr0_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:41; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long rsvd_45_63:19;
+ } sx;
+ struct uv2h_gr0_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:41; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long rsvd_45_63:19;
+ } s2;
+ struct uv3h_gr0_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:41; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long aa_ext:1; /* RO */
+ unsigned long undef_46_54:9; /* Undefined */
+ unsigned long way_ecc:9; /* RO */
+ } s3;
};
/* ========================================================================= */
@@ -719,9 +832,11 @@ union uvh_gr0_tlb_mmr_read_data_hi_u {
/* ========================================================================= */
#define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL
#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \
- UV1H_GR0_TLB_MMR_READ_DATA_LO : \
- UV2H_GR0_TLB_MMR_READ_DATA_LO)
+#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
+#define UVH_GR0_TLB_MMR_READ_DATA_LO \
+ (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \
+ (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \
+ UV3H_GR0_TLB_MMR_READ_DATA_LO))
#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
@@ -730,6 +845,34 @@ union uvh_gr0_tlb_mmr_read_data_hi_u {
#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
+#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
+#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
+#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
union uvh_gr0_tlb_mmr_read_data_lo_u {
unsigned long v;
struct uvh_gr0_tlb_mmr_read_data_lo_s {
@@ -737,12 +880,32 @@ union uvh_gr0_tlb_mmr_read_data_lo_u {
unsigned long asid:24; /* RO */
unsigned long valid:1; /* RO */
} s;
+ struct uv1h_gr0_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } s1;
+ struct uvxh_gr0_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } sx;
+ struct uv2h_gr0_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } s2;
+ struct uv3h_gr0_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } s3;
};
/* ========================================================================= */
/* UVH_GR1_TLB_INT0_CONFIG */
/* ========================================================================= */
-#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
+#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
@@ -780,7 +943,7 @@ union uvh_gr1_tlb_int0_config_u {
/* ========================================================================= */
/* UVH_GR1_TLB_INT1_CONFIG */
/* ========================================================================= */
-#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
+#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
@@ -820,9 +983,11 @@ union uvh_gr1_tlb_int1_config_u {
/* ========================================================================= */
#define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL
#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UVH_GR1_TLB_MMR_CONTROL (is_uv1_hub() ? \
- UV1H_GR1_TLB_MMR_CONTROL : \
- UV2H_GR1_TLB_MMR_CONTROL)
+#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL
+#define UVH_GR1_TLB_MMR_CONTROL \
+ (is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \
+ (is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \
+ UV3H_GR1_TLB_MMR_CONTROL))
#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
@@ -860,6 +1025,21 @@ union uvh_gr1_tlb_int1_config_u {
#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL
#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
+#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
+#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
+#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
+#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
+#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
+#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
+#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
+#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
+#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
+#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
+#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
+#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
+#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
+#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
+
#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
@@ -879,6 +1059,23 @@ union uvh_gr1_tlb_int1_config_u {
#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
+#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
+#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
+#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
+#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
+#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
+#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
+#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
+#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
+#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
+#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
+#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
+#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
+#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
+#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
+#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
+#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
+
union uvh_gr1_tlb_mmr_control_u {
unsigned long v;
struct uvh_gr1_tlb_mmr_control_s {
@@ -891,7 +1088,9 @@ union uvh_gr1_tlb_mmr_control_u {
unsigned long rsvd_21_29:9;
unsigned long mmr_write:1; /* WP */
unsigned long mmr_read:1; /* WP */
- unsigned long rsvd_32_63:32;
+ unsigned long rsvd_32_48:17;
+ unsigned long rsvd_49_51:3;
+ unsigned long rsvd_52_63:12;
} s;
struct uv1h_gr1_tlb_mmr_control_s {
unsigned long index:12; /* RW */
@@ -915,6 +1114,23 @@ union uvh_gr1_tlb_mmr_control_u {
unsigned long mmr_inj_tlblruv:1; /* RW */
unsigned long rsvd_61_63:3;
} s1;
+ struct uvxh_gr1_tlb_mmr_control_s {
+ unsigned long index:12; /* RW */
+ unsigned long mem_sel:2; /* RW */
+ unsigned long rsvd_14_15:2;
+ unsigned long auto_valid_en:1; /* RW */
+ unsigned long rsvd_17_19:3;
+ unsigned long mmr_hash_index_en:1; /* RW */
+ unsigned long rsvd_21_29:9;
+ unsigned long mmr_write:1; /* WP */
+ unsigned long mmr_read:1; /* WP */
+ unsigned long mmr_op_done:1; /* RW */
+ unsigned long rsvd_33_47:15;
+ unsigned long rsvd_48:1;
+ unsigned long rsvd_49_51:3;
+ unsigned long rsvd_52:1;
+ unsigned long rsvd_53_63:11;
+ } sx;
struct uv2h_gr1_tlb_mmr_control_s {
unsigned long index:12; /* RW */
unsigned long mem_sel:2; /* RW */
@@ -932,6 +1148,24 @@ union uvh_gr1_tlb_mmr_control_u {
unsigned long mmr_inj_tlbram:1; /* RW */
unsigned long rsvd_53_63:11;
} s2;
+ struct uv3h_gr1_tlb_mmr_control_s {
+ unsigned long index:12; /* RW */
+ unsigned long mem_sel:2; /* RW */
+ unsigned long rsvd_14_15:2;
+ unsigned long auto_valid_en:1; /* RW */
+ unsigned long rsvd_17_19:3;
+ unsigned long mmr_hash_index_en:1; /* RW */
+ unsigned long ecc_sel:1; /* RW */
+ unsigned long rsvd_22_29:8;
+ unsigned long mmr_write:1; /* WP */
+ unsigned long mmr_read:1; /* WP */
+ unsigned long mmr_op_done:1; /* RW */
+ unsigned long rsvd_33_47:15;
+ unsigned long undef_48:1; /* Undefined */
+ unsigned long rsvd_49_51:3;
+ unsigned long undef_52:1; /* Undefined */
+ unsigned long rsvd_53_63:11;
+ } s3;
};
/* ========================================================================= */
@@ -939,9 +1173,11 @@ union uvh_gr1_tlb_mmr_control_u {
/* ========================================================================= */
#define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL
#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \
- UV1H_GR1_TLB_MMR_READ_DATA_HI : \
- UV2H_GR1_TLB_MMR_READ_DATA_HI)
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
+#define UVH_GR1_TLB_MMR_READ_DATA_HI \
+ (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \
+ (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \
+ UV3H_GR1_TLB_MMR_READ_DATA_HI))
#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -952,6 +1188,46 @@ union uvh_gr1_tlb_mmr_control_u {
#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
+#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
+#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
+#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
+#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
+#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
+#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+
+#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
+#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
+#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
+#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
+#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
+#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
+#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+
+#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
+#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
+#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
+#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
+#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
+#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
+#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
+#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
+
union uvh_gr1_tlb_mmr_read_data_hi_u {
unsigned long v;
struct uvh_gr1_tlb_mmr_read_data_hi_s {
@@ -961,6 +1237,36 @@ union uvh_gr1_tlb_mmr_read_data_hi_u {
unsigned long larger:1; /* RO */
unsigned long rsvd_45_63:19;
} s;
+ struct uv1h_gr1_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:41; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long rsvd_45_63:19;
+ } s1;
+ struct uvxh_gr1_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:41; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long rsvd_45_63:19;
+ } sx;
+ struct uv2h_gr1_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:41; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long rsvd_45_63:19;
+ } s2;
+ struct uv3h_gr1_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:41; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long aa_ext:1; /* RO */
+ unsigned long undef_46_54:9; /* Undefined */
+ unsigned long way_ecc:9; /* RO */
+ } s3;
};
/* ========================================================================= */
@@ -968,9 +1274,11 @@ union uvh_gr1_tlb_mmr_read_data_hi_u {
/* ========================================================================= */
#define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL
#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \
- UV1H_GR1_TLB_MMR_READ_DATA_LO : \
- UV2H_GR1_TLB_MMR_READ_DATA_LO)
+#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
+#define UVH_GR1_TLB_MMR_READ_DATA_LO \
+ (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \
+ (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \
+ UV3H_GR1_TLB_MMR_READ_DATA_LO))
#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
@@ -979,6 +1287,34 @@ union uvh_gr1_tlb_mmr_read_data_hi_u {
#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
+#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
+#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
+#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
union uvh_gr1_tlb_mmr_read_data_lo_u {
unsigned long v;
struct uvh_gr1_tlb_mmr_read_data_lo_s {
@@ -986,12 +1322,32 @@ union uvh_gr1_tlb_mmr_read_data_lo_u {
unsigned long asid:24; /* RO */
unsigned long valid:1; /* RO */
} s;
+ struct uv1h_gr1_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } s1;
+ struct uvxh_gr1_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } sx;
+ struct uv2h_gr1_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } s2;
+ struct uv3h_gr1_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } s3;
};
/* ========================================================================= */
/* UVH_INT_CMPB */
/* ========================================================================= */
-#define UVH_INT_CMPB 0x22080UL
+#define UVH_INT_CMPB 0x22080UL
#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
@@ -1007,10 +1363,13 @@ union uvh_int_cmpb_u {
/* ========================================================================= */
/* UVH_INT_CMPC */
/* ========================================================================= */
-#define UVH_INT_CMPC 0x22100UL
+#define UVH_INT_CMPC 0x22100UL
+
+#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0
+#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
-#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0
-#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL
+#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0
+#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL
union uvh_int_cmpc_u {
unsigned long v;
@@ -1023,10 +1382,13 @@ union uvh_int_cmpc_u {
/* ========================================================================= */
/* UVH_INT_CMPD */
/* ========================================================================= */
-#define UVH_INT_CMPD 0x22180UL
+#define UVH_INT_CMPD 0x22180UL
-#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0
-#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL
+#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0
+#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
+
+#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0
+#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL
union uvh_int_cmpd_u {
unsigned long v;
@@ -1039,8 +1401,8 @@ union uvh_int_cmpd_u {
/* ========================================================================= */
/* UVH_IPI_INT */
/* ========================================================================= */
-#define UVH_IPI_INT 0x60500UL
-#define UVH_IPI_INT_32 0x348
+#define UVH_IPI_INT 0x60500UL
+#define UVH_IPI_INT_32 0x348
#define UVH_IPI_INT_VECTOR_SHFT 0
#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8
@@ -1069,8 +1431,8 @@ union uvh_ipi_int_u {
/* ========================================================================= */
/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
@@ -1091,8 +1453,8 @@ union uvh_lb_bau_intd_payload_queue_first_u {
/* ========================================================================= */
/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
@@ -1109,8 +1471,8 @@ union uvh_lb_bau_intd_payload_queue_last_u {
/* ========================================================================= */
/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
@@ -1127,8 +1489,8 @@ union uvh_lb_bau_intd_payload_queue_tail_u {
/* ========================================================================= */
/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
@@ -1189,14 +1551,21 @@ union uvh_lb_bau_intd_software_acknowledge_u {
/* ========================================================================= */
/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
+
/* ========================================================================= */
/* UVH_LB_BAU_MISC_CONTROL */
/* ========================================================================= */
-#define UVH_LB_BAU_MISC_CONTROL 0x320170UL
-#define UVH_LB_BAU_MISC_CONTROL_32 0xa10
+#define UVH_LB_BAU_MISC_CONTROL 0x320170UL
+#define UV1H_LB_BAU_MISC_CONTROL 0x320170UL
+#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL
+#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL
+#define UVH_LB_BAU_MISC_CONTROL_32 0xa10
+#define UV1H_LB_BAU_MISC_CONTROL_32 0x320170UL
+#define UV2H_LB_BAU_MISC_CONTROL_32 0x320170UL
+#define UV3H_LB_BAU_MISC_CONTROL_32 0x320170UL
#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
@@ -1213,6 +1582,7 @@ union uvh_lb_bau_intd_software_acknowledge_u {
#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
+#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
@@ -1228,6 +1598,7 @@ union uvh_lb_bau_intd_software_acknowledge_u {
#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
+#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
@@ -1262,6 +1633,53 @@ union uvh_lb_bau_intd_software_acknowledge_u {
#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
+#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
+#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
+#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
+#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
+#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
+#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
+#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
+#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
+#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
+#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
+#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
+#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
+#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
+#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
+#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
+#define UVXH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
+#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
+#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
+#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
+#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
+#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
+#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
+#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
+#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
+#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
+#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
+#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
+#define UVXH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
+
#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
@@ -1309,6 +1727,59 @@ union uvh_lb_bau_intd_software_acknowledge_u {
#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
+#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
+#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
+#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
+#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
+#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
+#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
+#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
+#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
+#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
+#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
+#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
+#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
+#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
+#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
+#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
+#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT 37
+#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
+#define UV3H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
+#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
+#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
+#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
+#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
+#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
+#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
+#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
+#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
+#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
+#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
+#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_MASK 0x0000002000000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
+#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
+
union uvh_lb_bau_misc_control_u {
unsigned long v;
struct uvh_lb_bau_misc_control_s {
@@ -1327,7 +1798,8 @@ union uvh_lb_bau_misc_control_u {
unsigned long programmed_initial_priority:3; /* RW */
unsigned long use_incoming_priority:1; /* RW */
unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long rsvd_29_63:35;
+ unsigned long rsvd_29_47:19;
+ unsigned long fun:16; /* RW */
} s;
struct uv1h_lb_bau_misc_control_s {
unsigned long rejection_delay:8; /* RW */
@@ -1348,6 +1820,32 @@ union uvh_lb_bau_misc_control_u {
unsigned long rsvd_29_47:19;
unsigned long fun:16; /* RW */
} s1;
+ struct uvxh_lb_bau_misc_control_s {
+ unsigned long rejection_delay:8; /* RW */
+ unsigned long apic_mode:1; /* RW */
+ unsigned long force_broadcast:1; /* RW */
+ unsigned long force_lock_nop:1; /* RW */
+ unsigned long qpi_agent_presence_vector:3; /* RW */
+ unsigned long descriptor_fetch_mode:1; /* RW */
+ unsigned long enable_intd_soft_ack_mode:1; /* RW */
+ unsigned long intd_soft_ack_timeout_period:4; /* RW */
+ unsigned long enable_dual_mapping_mode:1; /* RW */
+ unsigned long vga_io_port_decode_enable:1; /* RW */
+ unsigned long vga_io_port_16_bit_decode:1; /* RW */
+ unsigned long suppress_dest_registration:1; /* RW */
+ unsigned long programmed_initial_priority:3; /* RW */
+ unsigned long use_incoming_priority:1; /* RW */
+ unsigned long enable_programmed_initial_priority:1;/* RW */
+ unsigned long enable_automatic_apic_mode_selection:1;/* RW */
+ unsigned long apic_mode_status:1; /* RO */
+ unsigned long suppress_interrupts_to_self:1; /* RW */
+ unsigned long enable_lock_based_system_flush:1;/* RW */
+ unsigned long enable_extended_sb_status:1; /* RW */
+ unsigned long suppress_int_prio_udt_to_self:1;/* RW */
+ unsigned long use_legacy_descriptor_formats:1;/* RW */
+ unsigned long rsvd_36_47:12;
+ unsigned long fun:16; /* RW */
+ } sx;
struct uv2h_lb_bau_misc_control_s {
unsigned long rejection_delay:8; /* RW */
unsigned long apic_mode:1; /* RW */
@@ -1374,13 +1872,42 @@ union uvh_lb_bau_misc_control_u {
unsigned long rsvd_36_47:12;
unsigned long fun:16; /* RW */
} s2;
+ struct uv3h_lb_bau_misc_control_s {
+ unsigned long rejection_delay:8; /* RW */
+ unsigned long apic_mode:1; /* RW */
+ unsigned long force_broadcast:1; /* RW */
+ unsigned long force_lock_nop:1; /* RW */
+ unsigned long qpi_agent_presence_vector:3; /* RW */
+ unsigned long descriptor_fetch_mode:1; /* RW */
+ unsigned long enable_intd_soft_ack_mode:1; /* RW */
+ unsigned long intd_soft_ack_timeout_period:4; /* RW */
+ unsigned long enable_dual_mapping_mode:1; /* RW */
+ unsigned long vga_io_port_decode_enable:1; /* RW */
+ unsigned long vga_io_port_16_bit_decode:1; /* RW */
+ unsigned long suppress_dest_registration:1; /* RW */
+ unsigned long programmed_initial_priority:3; /* RW */
+ unsigned long use_incoming_priority:1; /* RW */
+ unsigned long enable_programmed_initial_priority:1;/* RW */
+ unsigned long enable_automatic_apic_mode_selection:1;/* RW */
+ unsigned long apic_mode_status:1; /* RO */
+ unsigned long suppress_interrupts_to_self:1; /* RW */
+ unsigned long enable_lock_based_system_flush:1;/* RW */
+ unsigned long enable_extended_sb_status:1; /* RW */
+ unsigned long suppress_int_prio_udt_to_self:1;/* RW */
+ unsigned long use_legacy_descriptor_formats:1;/* RW */
+ unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */
+ unsigned long enable_intd_prefetch_hint:1; /* RW */
+ unsigned long thread_kill_timebase:8; /* RW */
+ unsigned long rsvd_46_47:2;
+ unsigned long fun:16; /* RW */
+ } s3;
};
/* ========================================================================= */
/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */
/* ========================================================================= */
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
+#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
+#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0
#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62
@@ -1402,8 +1929,8 @@ union uvh_lb_bau_sb_activation_control_u {
/* ========================================================================= */
/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */
/* ========================================================================= */
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0
#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL
@@ -1418,8 +1945,8 @@ union uvh_lb_bau_sb_activation_status_0_u {
/* ========================================================================= */
/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */
/* ========================================================================= */
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0
#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL
@@ -1434,8 +1961,8 @@ union uvh_lb_bau_sb_activation_status_1_u {
/* ========================================================================= */
/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */
/* ========================================================================= */
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
@@ -1456,7 +1983,10 @@ union uvh_lb_bau_sb_descriptor_base_u {
/* ========================================================================= */
/* UVH_NODE_ID */
/* ========================================================================= */
-#define UVH_NODE_ID 0x0UL
+#define UVH_NODE_ID 0x0UL
+#define UV1H_NODE_ID 0x0UL
+#define UV2H_NODE_ID 0x0UL
+#define UV3H_NODE_ID 0x0UL
#define UVH_NODE_ID_FORCE1_SHFT 0
#define UVH_NODE_ID_MANUFACTURER_SHFT 1
@@ -1484,6 +2014,21 @@ union uvh_lb_bau_sb_descriptor_base_u {
#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL
#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL
+#define UVXH_NODE_ID_FORCE1_SHFT 0
+#define UVXH_NODE_ID_MANUFACTURER_SHFT 1
+#define UVXH_NODE_ID_PART_NUMBER_SHFT 12
+#define UVXH_NODE_ID_REVISION_SHFT 28
+#define UVXH_NODE_ID_NODE_ID_SHFT 32
+#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50
+#define UVXH_NODE_ID_NI_PORT_SHFT 57
+#define UVXH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
+#define UVXH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
+#define UVXH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
+#define UVXH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
+#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
+#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
+
#define UV2H_NODE_ID_FORCE1_SHFT 0
#define UV2H_NODE_ID_MANUFACTURER_SHFT 1
#define UV2H_NODE_ID_PART_NUMBER_SHFT 12
@@ -1499,6 +2044,25 @@ union uvh_lb_bau_sb_descriptor_base_u {
#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
+#define UV3H_NODE_ID_FORCE1_SHFT 0
+#define UV3H_NODE_ID_MANUFACTURER_SHFT 1
+#define UV3H_NODE_ID_PART_NUMBER_SHFT 12
+#define UV3H_NODE_ID_REVISION_SHFT 28
+#define UV3H_NODE_ID_NODE_ID_SHFT 32
+#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48
+#define UV3H_NODE_ID_RESERVED_2_SHFT 49
+#define UV3H_NODE_ID_NODES_PER_BIT_SHFT 50
+#define UV3H_NODE_ID_NI_PORT_SHFT 57
+#define UV3H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
+#define UV3H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
+#define UV3H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
+#define UV3H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
+#define UV3H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
+#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
+#define UV3H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
+#define UV3H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
+
union uvh_node_id_u {
unsigned long v;
struct uvh_node_id_s {
@@ -1521,6 +2085,17 @@ union uvh_node_id_u {
unsigned long ni_port:4; /* RO */
unsigned long rsvd_60_63:4;
} s1;
+ struct uvxh_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:15; /* RW */
+ unsigned long rsvd_47_49:3;
+ unsigned long nodes_per_bit:7; /* RO */
+ unsigned long ni_port:5; /* RO */
+ unsigned long rsvd_62_63:2;
+ } sx;
struct uv2h_node_id_s {
unsigned long force1:1; /* RO */
unsigned long manufacturer:11; /* RO */
@@ -1532,13 +2107,26 @@ union uvh_node_id_u {
unsigned long ni_port:5; /* RO */
unsigned long rsvd_62_63:2;
} s2;
+ struct uv3h_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:15; /* RW */
+ unsigned long rsvd_47:1;
+ unsigned long router_select:1; /* RO */
+ unsigned long rsvd_49:1;
+ unsigned long nodes_per_bit:7; /* RO */
+ unsigned long ni_port:5; /* RO */
+ unsigned long rsvd_62_63:2;
+ } s3;
};
/* ========================================================================= */
/* UVH_NODE_PRESENT_TABLE */
/* ========================================================================= */
-#define UVH_NODE_PRESENT_TABLE 0x1400UL
-#define UVH_NODE_PRESENT_TABLE_DEPTH 16
+#define UVH_NODE_PRESENT_TABLE 0x1400UL
+#define UVH_NODE_PRESENT_TABLE_DEPTH 16
#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0
#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
@@ -1553,7 +2141,7 @@ union uvh_node_present_table_u {
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
@@ -1577,7 +2165,7 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
@@ -1601,7 +2189,7 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
@@ -1625,7 +2213,7 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
@@ -1642,7 +2230,7 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
@@ -1659,7 +2247,7 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
@@ -1676,7 +2264,10 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
/* ========================================================================= */
/* UVH_RH_GAM_CONFIG_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL
+#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL
+#define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL
+#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL
+#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL
#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
@@ -1690,11 +2281,21 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL
+#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
+#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
+#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
+#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
+
#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
+#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
+#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
+#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
+#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
+
union uvh_rh_gam_config_mmr_u {
unsigned long v;
struct uvh_rh_gam_config_mmr_s {
@@ -1709,20 +2310,37 @@ union uvh_rh_gam_config_mmr_u {
unsigned long mmiol_cfg:1; /* RW */
unsigned long rsvd_13_63:51;
} s1;
+ struct uvxh_rh_gam_config_mmr_s {
+ unsigned long m_skt:6; /* RW */
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } sx;
struct uv2h_rh_gam_config_mmr_s {
unsigned long m_skt:6; /* RW */
unsigned long n_skt:4; /* RW */
unsigned long rsvd_10_63:54;
} s2;
+ struct uv3h_rh_gam_config_mmr_s {
+ unsigned long m_skt:6; /* RW */
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } s3;
};
/* ========================================================================= */
/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
+#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
@@ -1733,6 +2351,13 @@ union uvh_rh_gam_config_mmr_u {
#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
@@ -1740,12 +2365,23 @@ union uvh_rh_gam_config_mmr_u {
#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_SHFT 62
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK 0x4000000000000000UL
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
union uvh_rh_gam_gru_overlay_config_mmr_u {
unsigned long v;
struct uvh_rh_gam_gru_overlay_config_mmr_s {
unsigned long rsvd_0_27:28;
unsigned long base:18; /* RW */
- unsigned long rsvd_46_62:17;
+ unsigned long rsvd_46_51:6;
+ unsigned long n_gru:4; /* RW */
+ unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
} s;
struct uv1h_rh_gam_gru_overlay_config_mmr_s {
@@ -1758,6 +2394,14 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
} s1;
+ struct uvxh_rh_gam_gru_overlay_config_mmr_s {
+ unsigned long rsvd_0_27:28;
+ unsigned long base:18; /* RW */
+ unsigned long rsvd_46_51:6;
+ unsigned long n_gru:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } sx;
struct uv2h_rh_gam_gru_overlay_config_mmr_s {
unsigned long rsvd_0_27:28;
unsigned long base:18; /* RW */
@@ -1766,12 +2410,22 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
} s2;
+ struct uv3h_rh_gam_gru_overlay_config_mmr_s {
+ unsigned long rsvd_0_27:28;
+ unsigned long base:18; /* RW */
+ unsigned long rsvd_46_51:6;
+ unsigned long n_gru:4; /* RW */
+ unsigned long rsvd_56_61:6;
+ unsigned long mode:1; /* RW */
+ unsigned long enable:1; /* RW */
+ } s3;
};
/* ========================================================================= */
/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
+#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
@@ -1814,10 +2468,15 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u {
/* ========================================================================= */
/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
+#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
+#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
+#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46
@@ -1826,11 +2485,21 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u {
#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL
#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
union uvh_rh_gam_mmr_overlay_config_mmr_u {
unsigned long v;
struct uvh_rh_gam_mmr_overlay_config_mmr_s {
@@ -1846,18 +2515,30 @@ union uvh_rh_gam_mmr_overlay_config_mmr_u {
unsigned long rsvd_47_62:16;
unsigned long enable:1; /* RW */
} s1;
+ struct uvxh_rh_gam_mmr_overlay_config_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long rsvd_46_62:17;
+ unsigned long enable:1; /* RW */
+ } sx;
struct uv2h_rh_gam_mmr_overlay_config_mmr_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
} s2;
+ struct uv3h_rh_gam_mmr_overlay_config_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long rsvd_46_62:17;
+ unsigned long enable:1; /* RW */
+ } s3;
};
/* ========================================================================= */
/* UVH_RTC */
/* ========================================================================= */
-#define UVH_RTC 0x340000UL
+#define UVH_RTC 0x340000UL
#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
@@ -1873,7 +2554,7 @@ union uvh_rtc_u {
/* ========================================================================= */
/* UVH_RTC1_INT_CONFIG */
/* ========================================================================= */
-#define UVH_RTC1_INT_CONFIG 0x615c0UL
+#define UVH_RTC1_INT_CONFIG 0x615c0UL
#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0
#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
@@ -1911,8 +2592,8 @@ union uvh_rtc1_int_config_u {
/* ========================================================================= */
/* UVH_SCRATCH5 */
/* ========================================================================= */
-#define UVH_SCRATCH5 0x2d0200UL
-#define UVH_SCRATCH5_32 0x778
+#define UVH_SCRATCH5 0x2d0200UL
+#define UVH_SCRATCH5_32 0x778
#define UVH_SCRATCH5_SCRATCH5_SHFT 0
#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
@@ -1925,79 +2606,79 @@ union uvh_scratch5_u {
};
/* ========================================================================= */
-/* UV2H_EVENT_OCCURRED2 */
-/* ========================================================================= */
-#define UV2H_EVENT_OCCURRED2 0x70100UL
-#define UV2H_EVENT_OCCURRED2_32 0xb68
-
-#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0
-#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1
-#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2
-#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3
-#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4
-#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5
-#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6
-#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7
-#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8
-#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9
-#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10
-#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11
-#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12
-#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13
-#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14
-#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15
-#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16
-#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17
-#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18
-#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19
-#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20
-#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21
-#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22
-#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23
-#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24
-#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25
-#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26
-#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27
-#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28
-#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29
-#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30
-#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31
-#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
-#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
-#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
-#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
-#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
-#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
-#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
-#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
-#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
-#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
-#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
-#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
-#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
-#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
-#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
-#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
-
-union uv2h_event_occurred2_u {
+/* UVXH_EVENT_OCCURRED2 */
+/* ========================================================================= */
+#define UVXH_EVENT_OCCURRED2 0x70100UL
+#define UVXH_EVENT_OCCURRED2_32 0xb68
+
+#define UVXH_EVENT_OCCURRED2_RTC_0_SHFT 0
+#define UVXH_EVENT_OCCURRED2_RTC_1_SHFT 1
+#define UVXH_EVENT_OCCURRED2_RTC_2_SHFT 2
+#define UVXH_EVENT_OCCURRED2_RTC_3_SHFT 3
+#define UVXH_EVENT_OCCURRED2_RTC_4_SHFT 4
+#define UVXH_EVENT_OCCURRED2_RTC_5_SHFT 5
+#define UVXH_EVENT_OCCURRED2_RTC_6_SHFT 6
+#define UVXH_EVENT_OCCURRED2_RTC_7_SHFT 7
+#define UVXH_EVENT_OCCURRED2_RTC_8_SHFT 8
+#define UVXH_EVENT_OCCURRED2_RTC_9_SHFT 9
+#define UVXH_EVENT_OCCURRED2_RTC_10_SHFT 10
+#define UVXH_EVENT_OCCURRED2_RTC_11_SHFT 11
+#define UVXH_EVENT_OCCURRED2_RTC_12_SHFT 12
+#define UVXH_EVENT_OCCURRED2_RTC_13_SHFT 13
+#define UVXH_EVENT_OCCURRED2_RTC_14_SHFT 14
+#define UVXH_EVENT_OCCURRED2_RTC_15_SHFT 15
+#define UVXH_EVENT_OCCURRED2_RTC_16_SHFT 16
+#define UVXH_EVENT_OCCURRED2_RTC_17_SHFT 17
+#define UVXH_EVENT_OCCURRED2_RTC_18_SHFT 18
+#define UVXH_EVENT_OCCURRED2_RTC_19_SHFT 19
+#define UVXH_EVENT_OCCURRED2_RTC_20_SHFT 20
+#define UVXH_EVENT_OCCURRED2_RTC_21_SHFT 21
+#define UVXH_EVENT_OCCURRED2_RTC_22_SHFT 22
+#define UVXH_EVENT_OCCURRED2_RTC_23_SHFT 23
+#define UVXH_EVENT_OCCURRED2_RTC_24_SHFT 24
+#define UVXH_EVENT_OCCURRED2_RTC_25_SHFT 25
+#define UVXH_EVENT_OCCURRED2_RTC_26_SHFT 26
+#define UVXH_EVENT_OCCURRED2_RTC_27_SHFT 27
+#define UVXH_EVENT_OCCURRED2_RTC_28_SHFT 28
+#define UVXH_EVENT_OCCURRED2_RTC_29_SHFT 29
+#define UVXH_EVENT_OCCURRED2_RTC_30_SHFT 30
+#define UVXH_EVENT_OCCURRED2_RTC_31_SHFT 31
+#define UVXH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
+#define UVXH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
+#define UVXH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
+#define UVXH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
+#define UVXH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
+#define UVXH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
+#define UVXH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
+#define UVXH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
+#define UVXH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
+#define UVXH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
+#define UVXH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
+#define UVXH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
+#define UVXH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
+#define UVXH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
+#define UVXH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
+#define UVXH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
+#define UVXH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
+#define UVXH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
+#define UVXH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
+#define UVXH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
+#define UVXH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
+#define UVXH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
+#define UVXH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
+#define UVXH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
+#define UVXH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
+#define UVXH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
+#define UVXH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
+#define UVXH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
+#define UVXH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
+#define UVXH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
+#define UVXH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
+#define UVXH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+
+union uvxh_event_occurred2_u {
unsigned long v;
- struct uv2h_event_occurred2_s {
+ struct uvxh_event_occurred2_s {
unsigned long rtc_0:1; /* RW */
unsigned long rtc_1:1; /* RW */
unsigned long rtc_2:1; /* RW */
@@ -2031,29 +2712,46 @@ union uv2h_event_occurred2_u {
unsigned long rtc_30:1; /* RW */
unsigned long rtc_31:1; /* RW */
unsigned long rsvd_32_63:32;
- } s1;
+ } sx;
};
/* ========================================================================= */
-/* UV2H_EVENT_OCCURRED2_ALIAS */
+/* UVXH_EVENT_OCCURRED2_ALIAS */
/* ========================================================================= */
-#define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL
-#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70
+#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL
+#define UVXH_EVENT_OCCURRED2_ALIAS_32 0xb70
+
/* ========================================================================= */
-/* UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 */
+/* UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 */
/* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
+#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
+#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
+
+#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
+#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
-union uv2h_lb_bau_sb_activation_status_2_u {
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+
+union uvxh_lb_bau_sb_activation_status_2_u {
unsigned long v;
+ struct uvxh_lb_bau_sb_activation_status_2_s {
+ unsigned long aux_error:64; /* RW */
+ } sx;
struct uv2h_lb_bau_sb_activation_status_2_s {
unsigned long aux_error:64; /* RW */
- } s1;
+ } s2;
+ struct uv3h_lb_bau_sb_activation_status_2_s {
+ unsigned long aux_error:64; /* RW */
+ } s3;
};
/* ========================================================================= */
@@ -2073,5 +2771,87 @@ union uv1h_lb_target_physical_apic_id_mask_u {
} s1;
};
+/* ========================================================================= */
+/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */
+/* ========================================================================= */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
+
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uv3h_rh_gam_mmioh_overlay_config0_mmr_u {
+ unsigned long v;
+ struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s3;
+};
+
+/* ========================================================================= */
+/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */
+/* ========================================================================= */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1604000UL
+
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uv3h_rh_gam_mmioh_overlay_config1_mmr_u {
+ unsigned long v;
+ struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s3;
+};
+
+/* ========================================================================= */
+/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */
+/* ========================================================================= */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
+
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
+
+union uv3h_rh_gam_mmioh_redirect_config0_mmr_u {
+ unsigned long v;
+ struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s3;
+};
+
+/* ========================================================================= */
+/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */
+/* ========================================================================= */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
+
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
+
+union uv3h_rh_gam_mmioh_redirect_config1_mmr_u {
+ unsigned long v;
+ struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s3;
+};
+
#endif /* _ASM_X86_UV_UV_MMRS_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 235b49fa554..b6fbf860e39 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -57,9 +57,12 @@
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
#define SECONDARY_EXEC_RDTSCP 0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
@@ -97,6 +100,7 @@ enum vmcs_field {
GUEST_GS_SELECTOR = 0x0000080a,
GUEST_LDTR_SELECTOR = 0x0000080c,
GUEST_TR_SELECTOR = 0x0000080e,
+ GUEST_INTR_STATUS = 0x00000810,
HOST_ES_SELECTOR = 0x00000c00,
HOST_CS_SELECTOR = 0x00000c02,
HOST_SS_SELECTOR = 0x00000c04,
@@ -124,6 +128,14 @@ enum vmcs_field {
APIC_ACCESS_ADDR_HIGH = 0x00002015,
EPT_POINTER = 0x0000201a,
EPT_POINTER_HIGH = 0x0000201b,
+ EOI_EXIT_BITMAP0 = 0x0000201c,
+ EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
+ EOI_EXIT_BITMAP1 = 0x0000201e,
+ EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
+ EOI_EXIT_BITMAP2 = 0x00002020,
+ EOI_EXIT_BITMAP2_HIGH = 0x00002021,
+ EOI_EXIT_BITMAP3 = 0x00002022,
+ EOI_EXIT_BITMAP3_HIGH = 0x00002023,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
@@ -346,9 +358,9 @@ enum vmcs_field {
#define AR_RESERVD_MASK 0xfffe0f00
-#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0)
-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1)
-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2)
+#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1)
+#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
#define VMX_NR_VPIDS (1 << 16)
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 57693498519..d8d99222b36 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -69,17 +69,6 @@ struct x86_init_oem {
};
/**
- * struct x86_init_mapping - platform specific initial kernel pagetable setup
- * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
- *
- * For more details on the purpose of this hook, look in
- * init_memory_mapping and the commit that added it.
- */
-struct x86_init_mapping {
- void (*pagetable_reserve)(u64 start, u64 end);
-};
-
-/**
* struct x86_init_paging - platform specific paging functions
* @pagetable_init: platform specific paging initialization call to setup
* the kernel pagetables and prepare accessors functions.
@@ -136,7 +125,6 @@ struct x86_init_ops {
struct x86_init_mpparse mpparse;
struct x86_init_irqs irqs;
struct x86_init_oem oem;
- struct x86_init_mapping mapping;
struct x86_init_paging paging;
struct x86_init_timers timers;
struct x86_init_iommu iommu;
@@ -181,19 +169,38 @@ struct x86_platform_ops {
};
struct pci_dev;
+struct msi_msg;
struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
+ void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
+ unsigned int dest, struct msi_msg *msg,
+ u8 hpet_id);
void (*teardown_msi_irq)(unsigned int irq);
void (*teardown_msi_irqs)(struct pci_dev *dev);
void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
+ int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
};
+struct IO_APIC_route_entry;
+struct io_apic_irq_attr;
+struct irq_data;
+struct cpumask;
+
struct x86_io_apic_ops {
- void (*init) (void);
- unsigned int (*read) (unsigned int apic, unsigned int reg);
- void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
- void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
+ void (*init) (void);
+ unsigned int (*read) (unsigned int apic, unsigned int reg);
+ void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
+ void (*modify) (unsigned int apic, unsigned int reg, unsigned int value);
+ void (*disable)(void);
+ void (*print_entries)(unsigned int apic, unsigned int nr_entries);
+ int (*set_affinity)(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force);
+ int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
+ unsigned int destination, int vector,
+ struct io_apic_irq_attr *attr);
+ void (*eoi_ioapic_pin)(int apic, int pin, int vector);
};
extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index cc146d51449..ca842f2769e 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -16,4 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
return raw_irqs_disabled_flags(regs->flags);
}
+/* No need for a barrier -- XCHG is a barrier on x86. */
+#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
+
#endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 472b9b78301..6aef9fbc09b 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -212,4 +212,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr);
void make_lowmem_page_readonly(void *vaddr);
void make_lowmem_page_readwrite(void *vaddr);
+#define xen_remap(cookie, size) ioremap((cookie), (size));
+
#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index f8fde90bc45..d8829751b3f 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,10 +1,499 @@
#ifdef CONFIG_KMEMCHECK
/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
# include <asm-generic/xor.h>
+#elif !defined(_ASM_X86_XOR_H)
+#define _ASM_X86_XOR_H
+
+/*
+ * Optimized RAID-5 checksumming functions for SSE.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+/*
+ * Based on
+ * High-speed RAID5 checksumming functions utilizing SSE instructions.
+ * Copyright (C) 1998 Ingo Molnar.
+ */
+
+/*
+ * x86-64 changes / gcc fixes from Andi Kleen.
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ *
+ * This hasn't been optimized for the hammer yet, but there are likely
+ * no advantages to be gotten from x86-64 here anyways.
+ */
+
+#include <asm/i387.h>
+
+#ifdef CONFIG_X86_32
+/* reduce register pressure */
+# define XOR_CONSTANT_CONSTRAINT "i"
#else
+# define XOR_CONSTANT_CONSTRAINT "re"
+#endif
+
+#define OFFS(x) "16*("#x")"
+#define PF_OFFS(x) "256+16*("#x")"
+#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
+#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
+#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
+#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
+#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
+#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
+#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
+#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
+#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
+#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
+#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
+#define NOP(x)
+
+#define BLK64(pf, op, i) \
+ pf(i) \
+ op(i, 0) \
+ op(i + 1, 1) \
+ op(i + 2, 2) \
+ op(i + 3, 3)
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF2(i) \
+ PF2(i + 2) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ XO2(i, 0) \
+ XO2(i + 1, 1) \
+ XO2(i + 2, 2) \
+ XO2(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(PF2, XO2, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF2(i) \
+ PF2(i + 2) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ PF3(i) \
+ PF3(i + 2) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO2(i, 0) \
+ XO2(i + 1, 1) \
+ XO2(i + 2, 2) \
+ XO2(i + 3, 3) \
+ XO3(i, 0) \
+ XO3(i + 1, 1) \
+ XO3(i + 2, 2) \
+ XO3(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1),
+ [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(PF2, XO2, i) \
+ BLK64(PF3, XO3, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1),
+ [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF2(i) \
+ PF2(i + 2) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ PF3(i) \
+ PF3(i + 2) \
+ XO2(i, 0) \
+ XO2(i + 1, 1) \
+ XO2(i + 2, 2) \
+ XO2(i + 3, 3) \
+ PF4(i) \
+ PF4(i + 2) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO3(i, 0) \
+ XO3(i + 1, 1) \
+ XO3(i + 2, 2) \
+ XO3(i + 3, 3) \
+ XO4(i, 0) \
+ XO4(i + 1, 1) \
+ XO4(i + 2, 2) \
+ XO4(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " add %[inc], %[p5] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
+ [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(PF2, XO2, i) \
+ BLK64(PF3, XO3, i) \
+ BLK64(PF4, XO4, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " add %[inc], %[p5] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
+ [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static struct xor_block_template xor_block_sse_pf64 = {
+ .name = "prefetch64-sse",
+ .do_2 = xor_sse_2_pf64,
+ .do_3 = xor_sse_3_pf64,
+ .do_4 = xor_sse_4_pf64,
+ .do_5 = xor_sse_5_pf64,
+};
+
+#undef LD
+#undef XO1
+#undef XO2
+#undef XO3
+#undef XO4
+#undef ST
+#undef NOP
+#undef BLK64
+#undef BLOCK
+
+#undef XOR_CONSTANT_CONSTRAINT
+
#ifdef CONFIG_X86_32
# include <asm/xor_32.h>
#else
# include <asm/xor_64.h>
#endif
-#endif
+
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+ AVX_SELECT(FASTEST)
+
+#endif /* _ASM_X86_XOR_H */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index f79cb7ec0e0..ce05722e3c6 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -2,7 +2,7 @@
#define _ASM_X86_XOR_32_H
/*
- * Optimized RAID-5 checksumming functions for MMX and SSE.
+ * Optimized RAID-5 checksumming functions for MMX.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = {
.do_5 = xor_p5_mmx_5,
};
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-#define OFFS(x) "16*("#x")"
-#define PF_OFFS(x) "256+16*("#x")"
-#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
-#define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
-#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
-#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n"
-#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n"
-#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n"
-#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n"
-#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n"
-#define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
-#define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
-#define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
-#define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
-#define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- PF1(i) \
- PF1(i + 2) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i,0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i,0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i,0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- ST(i,0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r"(p2), "+r"(p3)
- :
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i,0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i,0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO2(i,0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- XO3(i,0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- ST(i,0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
- :
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- /* Make sure GCC forgets anything it knows about p4 or p5,
- such that it won't pass to the asm volatile below a
- register that is shared with any other variable. That's
- because we modify p4 and p5 there, but we can't mark them
- as read/write, otherwise we'd overflow the 10-asm-operands
- limit of GCC < 3.1. */
- asm("" : "+r" (p4), "+r" (p5));
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i,0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i,0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- XO2(i,0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- PF4(i) \
- PF4(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO3(i,0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- XO4(i,0) \
- XO4(i + 1, 1) \
- XO4(i + 2, 2) \
- XO4(i + 3, 3) \
- ST(i,0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " addl $256, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- : "r" (p4), "r" (p5)
- : "memory");
-
- /* p4 and p5 were modified, and now the variables are dead.
- Clobber them just to be sure nobody does something stupid
- like assuming they have some legal value. */
- asm("" : "=r" (p4), "=r" (p5));
-
- kernel_fpu_end();
-}
-
static struct xor_block_template xor_block_pIII_sse = {
.name = "pIII_sse",
.do_2 = xor_sse_2,
@@ -827,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = {
/* Also try the generic routines. */
#include <asm-generic/xor.h>
+/* We force the use of the SSE xor block because it can write around L2.
+ We may also be able to load into the L1 only depending on how the cpu
+ deals with a load to a line that is being prefetched. */
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
- xor_speed(&xor_block_8regs); \
- xor_speed(&xor_block_8regs_p); \
- xor_speed(&xor_block_32regs); \
- xor_speed(&xor_block_32regs_p); \
AVX_XOR_SPEED; \
- if (cpu_has_xmm) \
+ if (cpu_has_xmm) { \
xor_speed(&xor_block_pIII_sse); \
- if (cpu_has_mmx) { \
+ xor_speed(&xor_block_sse_pf64); \
+ } else if (cpu_has_mmx) { \
xor_speed(&xor_block_pII_mmx); \
xor_speed(&xor_block_p5_mmx); \
+ } else { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_8regs_p); \
+ xor_speed(&xor_block_32regs); \
+ xor_speed(&xor_block_32regs_p); \
} \
} while (0)
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
- AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
-
#endif /* _ASM_X86_XOR_32_H */
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 87ac522c4af..546f1e3b87c 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -1,301 +1,6 @@
#ifndef _ASM_X86_XOR_64_H
#define _ASM_X86_XOR_64_H
-/*
- * Optimized RAID-5 checksumming functions for MMX and SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-/*
- * Based on
- * High-speed RAID5 checksumming functions utilizing SSE instructions.
- * Copyright (C) 1998 Ingo Molnar.
- */
-
-/*
- * x86-64 changes / gcc fixes from Andi Kleen.
- * Copyright 2002 Andi Kleen, SuSE Labs.
- *
- * This hasn't been optimized for the hammer yet, but there are likely
- * no advantages to be gotten from x86-64 here anyways.
- */
-
-#include <asm/i387.h>
-
-#define OFFS(x) "16*("#x")"
-#define PF_OFFS(x) "256+16*("#x")"
-#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
-#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
-#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
-#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
-#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
-#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
-#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
-#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
-#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
-#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
-#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
-#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
-#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- PF1(i) \
- PF1(i + 2) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
- : [inc] "r" (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " addq %[inc], %[p3] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [cnt] "+r" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
- : [inc] "r" (256UL)
- : "memory");
- kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- XO3(i, 0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " addq %[inc], %[p3] ;\n"
- " addq %[inc], %[p4] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [cnt] "+c" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
- : [inc] "r" (256UL)
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- PF4(i) \
- PF4(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO3(i, 0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- XO4(i, 0) \
- XO4(i + 1, 1) \
- XO4(i + 2, 2) \
- XO4(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " addq %[inc], %[p3] ;\n"
- " addq %[inc], %[p4] ;\n"
- " addq %[inc], %[p5] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [cnt] "+c" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
- [p5] "+r" (p5)
- : [inc] "r" (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
static struct xor_block_template xor_block_sse = {
.name = "generic_sse",
.do_2 = xor_sse_2,
@@ -308,17 +13,15 @@ static struct xor_block_template xor_block_sse = {
/* Also try the AVX routines */
#include <asm/xor_avx.h>
+/* We force the use of the SSE xor block because it can write around L2.
+ We may also be able to load into the L1 only depending on how the cpu
+ deals with a load to a line that is being prefetched. */
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
AVX_XOR_SPEED; \
+ xor_speed(&xor_block_sse_pf64); \
xor_speed(&xor_block_sse); \
} while (0)
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
- AVX_SELECT(&xor_block_sse)
-
#endif /* _ASM_X86_XOR_64_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 92862cd9020..c15ddaf9071 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -1,6 +1,31 @@
#ifndef _ASM_X86_BOOTPARAM_H
#define _ASM_X86_BOOTPARAM_H
+/* setup_data types */
+#define SETUP_NONE 0
+#define SETUP_E820_EXT 1
+#define SETUP_DTB 2
+#define SETUP_PCI 3
+
+/* ram_size flags */
+#define RAMDISK_IMAGE_START_MASK 0x07FF
+#define RAMDISK_PROMPT_FLAG 0x8000
+#define RAMDISK_LOAD_FLAG 0x4000
+
+/* loadflags */
+#define LOADED_HIGH (1<<0)
+#define QUIET_FLAG (1<<5)
+#define KEEP_SEGMENTS (1<<6)
+#define CAN_USE_HEAP (1<<7)
+
+/* xloadflags */
+#define XLF_KERNEL_64 (1<<0)
+#define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1)
+#define XLF_EFI_HANDOVER_32 (1<<2)
+#define XLF_EFI_HANDOVER_64 (1<<3)
+
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#include <linux/screen_info.h>
#include <linux/apm_bios.h>
@@ -9,12 +34,6 @@
#include <asm/ist.h>
#include <video/edid.h>
-/* setup data types */
-#define SETUP_NONE 0
-#define SETUP_E820_EXT 1
-#define SETUP_DTB 2
-#define SETUP_PCI 3
-
/* extensible setup data list node */
struct setup_data {
__u64 next;
@@ -28,9 +47,6 @@ struct setup_header {
__u16 root_flags;
__u32 syssize;
__u16 ram_size;
-#define RAMDISK_IMAGE_START_MASK 0x07FF
-#define RAMDISK_PROMPT_FLAG 0x8000
-#define RAMDISK_LOAD_FLAG 0x4000
__u16 vid_mode;
__u16 root_dev;
__u16 boot_flag;
@@ -42,10 +58,6 @@ struct setup_header {
__u16 kernel_version;
__u8 type_of_loader;
__u8 loadflags;
-#define LOADED_HIGH (1<<0)
-#define QUIET_FLAG (1<<5)
-#define KEEP_SEGMENTS (1<<6)
-#define CAN_USE_HEAP (1<<7)
__u16 setup_move_size;
__u32 code32_start;
__u32 ramdisk_image;
@@ -58,7 +70,8 @@ struct setup_header {
__u32 initrd_addr_max;
__u32 kernel_alignment;
__u8 relocatable_kernel;
- __u8 _pad2[3];
+ __u8 min_alignment;
+ __u16 xloadflags;
__u32 cmdline_size;
__u32 hardware_subarch;
__u64 hardware_subarch_data;
@@ -106,7 +119,10 @@ struct boot_params {
__u8 hd1_info[16]; /* obsolete! */ /* 0x090 */
struct sys_desc_table sys_desc_table; /* 0x0a0 */
struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */
- __u8 _pad4[128]; /* 0x0c0 */
+ __u32 ext_ramdisk_image; /* 0x0c0 */
+ __u32 ext_ramdisk_size; /* 0x0c4 */
+ __u32 ext_cmd_line_ptr; /* 0x0c8 */
+ __u8 _pad4[116]; /* 0x0cc */
struct edid_info edid_info; /* 0x140 */
struct efi_info efi_info; /* 0x1c0 */
__u32 alt_mem_k; /* 0x1e0 */
@@ -115,7 +131,20 @@ struct boot_params {
__u8 eddbuf_entries; /* 0x1e9 */
__u8 edd_mbr_sig_buf_entries; /* 0x1ea */
__u8 kbd_status; /* 0x1eb */
- __u8 _pad6[5]; /* 0x1ec */
+ __u8 _pad5[3]; /* 0x1ec */
+ /*
+ * The sentinel is set to a nonzero value (0xff) in header.S.
+ *
+ * A bootloader is supposed to only take setup_header and put
+ * it into a clean boot_params buffer. If it turns out that
+ * it is clumsy or too generous with the buffer, it most
+ * probably will pick up the sentinel variable too. The fact
+ * that this variable then is still 0xff will let kernel
+ * know that some variables in boot_params are invalid and
+ * kernel should zero out certain portions of boot_params.
+ */
+ __u8 sentinel; /* 0x1ef */
+ __u8 _pad6[1]; /* 0x1f0 */
struct setup_header hdr; /* setup header */ /* 0x1f1 */
__u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)];
__u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */
@@ -134,6 +163,6 @@ enum {
X86_NR_SUBARCHS,
};
-
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 58c829871c3..a0eab85ce7b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -4,66 +4,6 @@
#include <linux/types.h>
#include <asm/ioctls.h>
-/*
- * Machine Check support for x86
- */
-
-/* MCG_CAP register defines */
-#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
-#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
-#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
-#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
-#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
-#define MCG_EXT_CNT_SHIFT 16
-#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
-#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
-
-/* MCG_STATUS register defines */
-#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
-#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
-#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
-
-/* MCi_STATUS register defines */
-#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
-#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
-#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
-#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
-#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
-#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
-#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
-#define MCI_STATUS_AR (1ULL<<55) /* Action required */
-#define MCACOD 0xffff /* MCA Error Code */
-
-/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
-#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
-#define MCACOD_SCRUBMSK 0xfff0
-#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
-#define MCACOD_DATA 0x0134 /* Data Load */
-#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
-
-/* MCi_MISC register defines */
-#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
-#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
-#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
-#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
-#define MCI_MISC_ADDR_PHYS 2 /* physical address */
-#define MCI_MISC_ADDR_MEM 3 /* memory address */
-#define MCI_MISC_ADDR_GENERIC 7 /* generic */
-
-/* CTL2 register defines */
-#define MCI_CTL2_CMCI_EN (1ULL << 30)
-#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
-
-#define MCJ_CTX_MASK 3
-#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
-#define MCJ_CTX_RANDOM 0 /* inject context: random */
-#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
-#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
-#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
-#define MCJ_EXCEPTION 0x8 /* raise as exception */
-#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
-
/* Fields are zero when not available */
struct mce {
__u64 status;
@@ -87,35 +27,8 @@ struct mce {
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
};
-/*
- * This structure contains all data related to the MCE log. Also
- * carries a signature to make it easier to find from external
- * debugging tools. Each entry is only valid when its finished flag
- * is set.
- */
-
-#define MCE_LOG_LEN 32
-
-struct mce_log {
- char signature[12]; /* "MACHINECHECK" */
- unsigned len; /* = MCE_LOG_LEN */
- unsigned next;
- unsigned flags;
- unsigned recordlen; /* length of struct mce */
- struct mce entry[MCE_LOG_LEN];
-};
-
-#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
-
-#define MCE_LOG_SIGNATURE "MACHINECHECK"
-
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
#define MCE_GET_LOG_LEN _IOR('M', 2, int)
#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
-/* Software defined banks */
-#define MCE_EXTENDED_BANK 128
-#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
-#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
-
#endif /* _UAPI_ASM_X86_MCE_H */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 433a59fb1a7..892ce40a747 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -103,6 +103,8 @@
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
+#define MSR_IA32_POWER_CTL 0x000001fc
+
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
@@ -173,6 +175,7 @@
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD64_DC_CFG 0xc0011022
+#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
@@ -194,6 +197,8 @@
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_NB_PERF_CTL 0xc0010240
+#define MSR_F15H_NB_PERF_CTR 0xc0010241
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
@@ -272,6 +277,7 @@
#define MSR_IA32_PLATFORM_ID 0x00000017
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_EBC_FREQUENCY_ID 0x0000002c
+#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index aa7d6ae39e0..8264f47cf53 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -95,9 +95,9 @@ typedef unsigned long sigset_t;
#ifndef __ASSEMBLY__
-#ifdef __i386__
# ifndef __KERNEL__
/* Here we must cater to libcs that poke about in kernel headers. */
+#ifdef __i386__
struct sigaction {
union {
@@ -112,7 +112,6 @@ struct sigaction {
#define sa_handler _u._sa_handler
#define sa_sigaction _u._sa_sigaction
-# endif /* ! __KERNEL__ */
#else /* __i386__ */
struct sigaction {
@@ -122,11 +121,8 @@ struct sigaction {
sigset_t sa_mask; /* mask last for extensibility */
};
-struct k_sigaction {
- struct sigaction sa;
-};
-
#endif /* !__i386__ */
+# endif /* ! __KERNEL__ */
typedef struct sigaltstack {
void __user *ss_sp;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 979d03bce13..2871fccfee6 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -62,10 +62,12 @@
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EOI_INDUCED 45
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
#define EXIT_REASON_INVPCID 58
#define VMX_EXIT_REASONS \
@@ -103,7 +105,12 @@
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
- { EXIT_REASON_WBINVD, "WBINVD" }
+ { EXIT_REASON_WBINVD, "WBINVD" }, \
+ { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
+ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
+ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
+ { EXIT_REASON_INVD, "INVD" }, \
+ { EXIT_REASON_INVPCID, "INVPCID" }
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34e923a5376..7bd3bd31010 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
-obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
+obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
@@ -88,6 +87,9 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
+obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o
+obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o
+obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o
microcode-y := microcode_core.o
microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bacf4b0d91f..230c8ea878e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(acpi_disabled);
#ifdef CONFIG_X86_64
# include <asm/proto.h>
-# include <asm/numa_64.h>
#endif /* X86 */
#define BAD_MADT_ENTRY(entry, end) ( \
@@ -697,6 +696,10 @@ EXPORT_SYMBOL(acpi_map_lsapic);
int acpi_unmap_lsapic(int cpu)
{
+#ifdef CONFIG_ACPI_NUMA
+ set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE);
+#endif
+
per_cpu(x86_cpu_to_apicid, cpu) = -1;
set_cpu_present(cpu, false);
num_processors--;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index d5e0d717005..0532f5d6e4e 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void)
#ifndef CONFIG_64BIT
header->pmode_entry = (u32)&wakeup_pmode_return;
- header->pmode_cr3 = (u32)__pa(&initial_page_table);
+ header->pmode_cr3 = (u32)__pa_symbol(initial_page_table);
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index e66311200cb..b574b295a2f 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -768,10 +768,9 @@ int __init gart_iommu_init(void)
aper_base = info.aper_base;
end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
- if (end_pfn > max_low_pfn_mapped) {
- start_pfn = (aper_base>>PAGE_SHIFT);
+ start_pfn = PFN_DOWN(aper_base);
+ if (!pfn_range_is_mapped(start_pfn, end_pfn))
init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
- }
pr_info("PCI-DMA: using GART IOMMU.\n");
iommu_size = check_iommu_size(info.aper_base, aper_size);
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index afdc3f756de..c9876efecaf 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -240,7 +240,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
dw_apb_clockevent_pause(adev->timer);
if (system_state == SYSTEM_RUNNING) {
pr_debug("skipping APBT CPU %lu offline\n", cpu);
- } else if (adev) {
+ } else {
pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
dw_apb_clockevent_stop(adev->timer);
}
@@ -311,7 +311,6 @@ void __init apbt_time_init(void)
#ifdef CONFIG_SMP
int i;
struct sfi_timer_table_entry *p_mtmr;
- unsigned int percpu_timer;
struct apbt_dev *adev;
#endif
@@ -346,13 +345,10 @@ void __init apbt_time_init(void)
return;
}
pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
- if (num_possible_cpus() <= sfi_mtimer_num) {
- percpu_timer = 1;
+ if (num_possible_cpus() <= sfi_mtimer_num)
apbt_num_timers_used = num_possible_cpus();
- } else {
- percpu_timer = 0;
+ else
apbt_num_timers_used = 1;
- }
pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
/* here we set up per CPU timer data structure */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b994cc84aa7..a5b4dce1b7a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void)
* Now that local APIC setup is completed for BP, configure the fault
* handling for interrupt remapping.
*/
- if (irq_remapping_enabled)
- irq_remap_enable_fault_handling();
+ irq_remap_enable_fault_handling();
}
@@ -2251,8 +2250,7 @@ static int lapic_suspend(void)
local_irq_save(flags);
disable_local_APIC();
- if (irq_remapping_enabled)
- irq_remapping_disable();
+ irq_remapping_disable();
local_irq_restore(flags);
return 0;
@@ -2268,16 +2266,15 @@ static void lapic_resume(void)
return;
local_irq_save(flags);
- if (irq_remapping_enabled) {
- /*
- * IO-APIC and PIC have their own resume routines.
- * We just mask them here to make sure the interrupt
- * subsystem is completely quiet while we enable x2apic
- * and interrupt-remapping.
- */
- mask_ioapic_entries();
- legacy_pic->mask_all();
- }
+
+ /*
+ * IO-APIC and PIC have their own resume routines.
+ * We just mask them here to make sure the interrupt
+ * subsystem is completely quiet while we enable x2apic
+ * and interrupt-remapping.
+ */
+ mask_ioapic_entries();
+ legacy_pic->mask_all();
if (x2apic_mode)
enable_x2apic();
@@ -2320,8 +2317,7 @@ static void lapic_resume(void)
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
- if (irq_remapping_enabled)
- irq_remapping_reenable(x2apic_mode);
+ irq_remapping_reenable(x2apic_mode);
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 9c2aa89a11c..9a9110918ca 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -28,6 +28,7 @@
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/apic_flat_64.h>
+#include <asm/pgtable.h>
static int numachip_system __read_mostly;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b739d398bb2..9ed796ccc32 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,22 +68,6 @@
#define for_each_irq_pin(entry, head) \
for (entry = head; entry; entry = entry->next)
-#ifdef CONFIG_IRQ_REMAP
-static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
-static inline bool irq_remapped(struct irq_cfg *cfg)
-{
- return cfg->irq_2_iommu.iommu != NULL;
-}
-#else
-static inline bool irq_remapped(struct irq_cfg *cfg)
-{
- return false;
-}
-static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
-{
-}
-#endif
-
/*
* Is the SiS APIC rmw bug present ?
* -1 = don't know, 0 = no, 1 = yes
@@ -300,9 +284,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
return cfg;
}
-static int alloc_irq_from(unsigned int from, int node)
+static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
{
- return irq_alloc_desc_from(from, node);
+ return irq_alloc_descs_from(from, count, node);
}
static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -326,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+ (mpc_ioapic_addr(idx) & ~PAGE_MASK);
}
-static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
+void io_apic_eoi(unsigned int apic, unsigned int vector)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
writel(vector, &io_apic->eoi);
@@ -573,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data)
* Otherwise, we simulate the EOI message manually by changing the trigger
* mode to edge and then back to level, with RTE being masked during this.
*/
-static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
+void native_eoi_ioapic_pin(int apic, int pin, int vector)
{
if (mpc_ioapic_ver(apic) >= 0x20) {
- /*
- * Intr-remapping uses pin number as the virtual vector
- * in the RTE. Actual vector is programmed in
- * intr-remapping table entry. Hence for the io-apic
- * EOI we use the pin number.
- */
- if (cfg && irq_remapped(cfg))
- io_apic_eoi(apic, pin);
- else
- io_apic_eoi(apic, vector);
+ io_apic_eoi(apic, vector);
} else {
struct IO_APIC_route_entry entry, entry1;
@@ -606,14 +581,15 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
}
}
-static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
for_each_irq_pin(entry, cfg->irq_2_pin)
- __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
+ x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
+ cfg->vector);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -650,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
}
raw_spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_pin(apic, pin, entry.vector, NULL);
+ x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -1304,25 +1280,18 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
fasteoi = false;
}
- if (irq_remapped(cfg)) {
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- irq_remap_modify_chip_defaults(chip);
+ if (setup_remapped_irq(irq, cfg, chip))
fasteoi = trigger != 0;
- }
hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
irq_set_chip_and_handler_name(irq, chip, hdl,
fasteoi ? "fasteoi" : "edge");
}
-static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
+int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
+ unsigned int destination, int vector,
+ struct io_apic_irq_attr *attr)
{
- if (irq_remapping_enabled)
- return setup_ioapic_remapped_entry(irq, entry, destination,
- vector, attr);
-
memset(entry, 0, sizeof(*entry));
entry->delivery_mode = apic->irq_delivery_mode;
@@ -1370,8 +1339,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
cfg->vector, irq, attr->trigger, attr->polarity, dest);
- if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
- pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
+ pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
__clear_irq_vector(irq, cfg);
@@ -1479,9 +1448,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
struct IO_APIC_route_entry entry;
unsigned int dest;
- if (irq_remapping_enabled)
- return;
-
memset(&entry, 0, sizeof(entry));
/*
@@ -1513,9 +1479,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
ioapic_write_entry(ioapic_idx, pin, entry);
}
-__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
{
int i;
+
+ pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
+
+ for (i = 0; i <= nr_entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+ entry = ioapic_read_entry(apic, i);
+
+ pr_debug(" %02x %02X ", i, entry.dest);
+ pr_cont("%1d %1d %1d %1d %1d "
+ "%1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector);
+ }
+}
+
+void intel_ir_io_apic_print_entries(unsigned int apic,
+ unsigned int nr_entries)
+{
+ int i;
+
+ pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
+
+ for (i = 0; i <= nr_entries; i++) {
+ struct IR_IO_APIC_route_entry *ir_entry;
+ struct IO_APIC_route_entry entry;
+
+ entry = ioapic_read_entry(apic, i);
+
+ ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
+
+ pr_debug(" %02x %04X ", i, ir_entry->index);
+ pr_cont("%1d %1d %1d %1d %1d "
+ "%1d %1d %X %02X\n",
+ ir_entry->format,
+ ir_entry->mask,
+ ir_entry->trigger,
+ ir_entry->irr,
+ ir_entry->polarity,
+ ir_entry->delivery_status,
+ ir_entry->index2,
+ ir_entry->zero,
+ ir_entry->vector);
+ }
+}
+
+__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+{
union IO_APIC_reg_00 reg_00;
union IO_APIC_reg_01 reg_01;
union IO_APIC_reg_02 reg_02;
@@ -1568,58 +1588,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
printk(KERN_DEBUG ".... IRQ redirection table:\n");
- if (irq_remapping_enabled) {
- printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
- " Pol Stat Indx2 Zero Vect:\n");
- } else {
- printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
- " Stat Dmod Deli Vect:\n");
- }
-
- for (i = 0; i <= reg_01.bits.entries; i++) {
- if (irq_remapping_enabled) {
- struct IO_APIC_route_entry entry;
- struct IR_IO_APIC_route_entry *ir_entry;
-
- entry = ioapic_read_entry(ioapic_idx, i);
- ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
- printk(KERN_DEBUG " %02x %04X ",
- i,
- ir_entry->index
- );
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %X %02X\n",
- ir_entry->format,
- ir_entry->mask,
- ir_entry->trigger,
- ir_entry->irr,
- ir_entry->polarity,
- ir_entry->delivery_status,
- ir_entry->index2,
- ir_entry->zero,
- ir_entry->vector
- );
- } else {
- struct IO_APIC_route_entry entry;
-
- entry = ioapic_read_entry(ioapic_idx, i);
- printk(KERN_DEBUG " %02x %02X ",
- i,
- entry.dest
- );
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector
- );
- }
- }
+ x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
}
__apicdebuginit(void) print_IO_APICs(void)
@@ -1921,30 +1890,14 @@ void __init enable_IO_APIC(void)
clear_IO_APIC();
}
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
+void native_disable_io_apic(void)
{
/*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
- if (!legacy_pic->nr_legacy_irqs)
- return;
-
- /*
* If the i8259 is routed through an IOAPIC
* Put that IOAPIC in virtual wire mode
* so legacy interrupts can be delivered.
- *
- * With interrupt-remapping, for now we will use virtual wire A mode,
- * as virtual wire B is little complex (need to configure both
- * IOAPIC RTE as well as interrupt-remapping table entry).
- * As this gets called during crash dump, keep this simple for now.
*/
- if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) {
+ if (ioapic_i8259.pin != -1) {
struct IO_APIC_route_entry entry;
memset(&entry, 0, sizeof(entry));
@@ -1964,12 +1917,25 @@ void disable_IO_APIC(void)
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
+ if (cpu_has_apic || apic_from_smp_config())
+ disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
/*
- * Use virtual wire A mode when interrupt remapping is enabled.
+ * Clear the IO-APIC before rebooting:
*/
- if (cpu_has_apic || apic_from_smp_config())
- disconnect_bsp_APIC(!irq_remapping_enabled &&
- ioapic_i8259.pin != -1);
+ clear_IO_APIC();
+
+ if (!legacy_pic->nr_legacy_irqs)
+ return;
+
+ x86_io_apic_ops.disable();
}
#ifdef CONFIG_X86_32
@@ -2322,12 +2288,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
apic = entry->apic;
pin = entry->pin;
- /*
- * With interrupt-remapping, destination information comes
- * from interrupt-remapping table entry.
- */
- if (!irq_remapped(cfg))
- io_apic_write(apic, 0x11 + pin*2, dest);
+
+ io_apic_write(apic, 0x11 + pin*2, dest);
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
@@ -2369,9 +2331,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
return 0;
}
-static int
-ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
+
+int native_ioapic_set_affinity(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force)
{
unsigned int dest, irq = data->irq;
unsigned long flags;
@@ -2548,33 +2511,6 @@ static void ack_apic_level(struct irq_data *data)
ioapic_irqd_unmask(data, cfg, masked);
}
-#ifdef CONFIG_IRQ_REMAP
-static void ir_ack_apic_edge(struct irq_data *data)
-{
- ack_APIC_irq();
-}
-
-static void ir_ack_apic_level(struct irq_data *data)
-{
- ack_APIC_irq();
- eoi_ioapic_irq(data->irq, data->chip_data);
-}
-
-static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
-{
- seq_printf(p, " IR-%s", data->chip->name);
-}
-
-static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
-{
- chip->irq_print_chip = ir_print_prefix;
- chip->irq_ack = ir_ack_apic_edge;
- chip->irq_eoi = ir_ack_apic_level;
-
- chip->irq_set_affinity = set_remapped_irq_affinity;
-}
-#endif /* CONFIG_IRQ_REMAP */
-
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
.irq_startup = startup_ioapic_irq,
@@ -2582,7 +2518,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_unmask = unmask_ioapic_irq,
.irq_ack = ack_apic_edge,
.irq_eoi = ack_apic_level,
- .irq_set_affinity = ioapic_set_affinity,
+ .irq_set_affinity = native_ioapic_set_affinity,
.irq_retrigger = ioapic_retrigger_irq,
};
@@ -2781,8 +2717,7 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
- if (irq_remapping_enabled)
- panic("BIOS bug: timer not connected to IO-APIC");
+ panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
@@ -2814,8 +2749,7 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1);
goto out;
}
- if (irq_remapping_enabled)
- panic("timer doesn't work through Interrupt-remapped IO-APIC");
+ panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
@@ -2982,37 +2916,58 @@ device_initcall(ioapic_init_ops);
/*
* Dynamic irq allocate and deallocation
*/
-unsigned int create_irq_nr(unsigned int from, int node)
+unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
{
- struct irq_cfg *cfg;
+ struct irq_cfg **cfg;
unsigned long flags;
- unsigned int ret = 0;
- int irq;
+ int irq, i;
if (from < nr_irqs_gsi)
from = nr_irqs_gsi;
- irq = alloc_irq_from(from, node);
- if (irq < 0)
- return 0;
- cfg = alloc_irq_cfg(irq, node);
- if (!cfg) {
- free_irq_at(irq, NULL);
+ cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
+ if (!cfg)
return 0;
+
+ irq = alloc_irqs_from(from, count, node);
+ if (irq < 0)
+ goto out_cfgs;
+
+ for (i = 0; i < count; i++) {
+ cfg[i] = alloc_irq_cfg(irq + i, node);
+ if (!cfg[i])
+ goto out_irqs;
}
raw_spin_lock_irqsave(&vector_lock, flags);
- if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
- ret = irq;
+ for (i = 0; i < count; i++)
+ if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
+ goto out_vecs;
raw_spin_unlock_irqrestore(&vector_lock, flags);
- if (ret) {
- irq_set_chip_data(irq, cfg);
- irq_clear_status_flags(irq, IRQ_NOREQUEST);
- } else {
- free_irq_at(irq, cfg);
+ for (i = 0; i < count; i++) {
+ irq_set_chip_data(irq + i, cfg[i]);
+ irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
}
- return ret;
+
+ kfree(cfg);
+ return irq;
+
+out_vecs:
+ for (i--; i >= 0; i--)
+ __clear_irq_vector(irq + i, cfg[i]);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+out_irqs:
+ for (i = 0; i < count; i++)
+ free_irq_at(irq + i, cfg[i]);
+out_cfgs:
+ kfree(cfg);
+ return 0;
+}
+
+unsigned int create_irq_nr(unsigned int from, int node)
+{
+ return __create_irqs(from, 1, node);
}
int create_irq(void)
@@ -3037,48 +2992,35 @@ void destroy_irq(unsigned int irq)
irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
- if (irq_remapped(cfg))
- free_remapped_irq(irq);
+ free_remapped_irq(irq);
+
raw_spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq, cfg);
raw_spin_unlock_irqrestore(&vector_lock, flags);
free_irq_at(irq, cfg);
}
+void destroy_irqs(unsigned int irq, unsigned int count)
+{
+ unsigned int i;
+
+ for (i = 0; i < count; i++)
+ destroy_irq(irq + i);
+}
+
/*
* MSI message composition
*/
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
- struct msi_msg *msg, u8 hpet_id)
+void native_compose_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id)
{
- struct irq_cfg *cfg;
- int err;
- unsigned dest;
-
- if (disable_apic)
- return -ENXIO;
-
- cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
- if (err)
- return err;
+ struct irq_cfg *cfg = irq_cfg(irq);
- err = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus(), &dest);
- if (err)
- return err;
-
- if (irq_remapped(cfg)) {
- compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
- return err;
- }
+ msg->address_hi = MSI_ADDR_BASE_HI;
if (x2apic_enabled())
- msg->address_hi = MSI_ADDR_BASE_HI |
- MSI_ADDR_EXT_DEST_ID(dest);
- else
- msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
msg->address_lo =
MSI_ADDR_BASE_LO |
@@ -3097,8 +3039,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
MSI_DATA_DELIVERY_FIXED:
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(cfg->vector);
+}
- return err;
+#ifdef CONFIG_PCI_MSI
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+ struct msi_msg *msg, u8 hpet_id)
+{
+ struct irq_cfg *cfg;
+ int err;
+ unsigned dest;
+
+ if (disable_apic)
+ return -ENXIO;
+
+ cfg = irq_cfg(irq);
+ err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ if (err)
+ return err;
+
+ err = apic->cpu_mask_to_apicid_and(cfg->domain,
+ apic->target_cpus(), &dest);
+ if (err)
+ return err;
+
+ x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+
+ return 0;
}
static int
@@ -3136,23 +3102,28 @@ static struct irq_chip msi_chip = {
.irq_retrigger = ioapic_retrigger_irq,
};
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ unsigned int irq_base, unsigned int irq_offset)
{
struct irq_chip *chip = &msi_chip;
struct msi_msg msg;
+ unsigned int irq = irq_base + irq_offset;
int ret;
ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
return ret;
- irq_set_msi_desc(irq, msidesc);
- write_msi_msg(irq, &msg);
+ irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
- if (irq_remapped(irq_get_chip_data(irq))) {
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- irq_remap_modify_chip_defaults(chip);
- }
+ /*
+ * MSI-X message is written per-IRQ, the offset is always 0.
+ * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+ */
+ if (!irq_offset)
+ write_msi_msg(irq, &msg);
+
+ setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
@@ -3163,46 +3134,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int node, ret, sub_handle, index = 0;
unsigned int irq, irq_want;
struct msi_desc *msidesc;
+ int node, ret;
- /* x86 doesn't support multiple MSI yet */
+ /* Multiple MSI vectors only supported with interrupt remapping */
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
node = dev_to_node(&dev->dev);
irq_want = nr_irqs_gsi;
- sub_handle = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = create_irq_nr(irq_want, node);
if (irq == 0)
- return -1;
+ return -ENOSPC;
+
irq_want = irq + 1;
- if (!irq_remapping_enabled)
- goto no_ir;
- if (!sub_handle) {
- /*
- * allocate the consecutive block of IRTE's
- * for 'nvec'
- */
- index = msi_alloc_remapped_irq(dev, irq, nvec);
- if (index < 0) {
- ret = index;
- goto error;
- }
- } else {
- ret = msi_setup_remapped_irq(dev, irq, index,
- sub_handle);
- if (ret < 0)
- goto error;
- }
-no_ir:
- ret = setup_msi_irq(dev, msidesc, irq);
+ ret = setup_msi_irq(dev, msidesc, irq, 0);
if (ret < 0)
goto error;
- sub_handle++;
}
return 0;
@@ -3298,26 +3249,19 @@ static struct irq_chip hpet_msi_type = {
.irq_retrigger = ioapic_retrigger_irq,
};
-int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
+int default_setup_hpet_msi(unsigned int irq, unsigned int id)
{
struct irq_chip *chip = &hpet_msi_type;
struct msi_msg msg;
int ret;
- if (irq_remapping_enabled) {
- ret = setup_hpet_msi_remapped(irq, id);
- if (ret)
- return ret;
- }
-
ret = msi_compose_msg(NULL, irq, &msg, id);
if (ret < 0)
return ret;
hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- if (irq_remapped(irq_get_chip_data(irq)))
- irq_remap_modify_chip_defaults(chip);
+ setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
@@ -3683,10 +3627,7 @@ void __init setup_ioapic_dest(void)
else
mask = apic->target_cpus();
- if (irq_remapping_enabled)
- set_remapped_irq_affinity(idata, mask, false);
- else
- ioapic_set_affinity(idata, mask, false);
+ x86_io_apic_ops.set_affinity(idata, mask, false);
}
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index cce91bf2667..7434d8556d0 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
unsigned long mask = cpumask_bits(cpumask)[0];
unsigned long flags;
- if (WARN_ONCE(!mask, "empty IPI mask"))
+ if (!mask)
return;
local_irq_save(flags);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index e03a1e180e8..562a76d433c 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg)
}
early_param("x2apic_phys", set_x2apic_phys_mode);
-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static bool x2apic_fadt_phys(void)
{
- if (x2apic_phys)
- return x2apic_enabled();
- else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
- x2apic_enabled()) {
+ if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
printk(KERN_DEBUG "System requires x2apic physical mode\n");
- return 1;
+ return true;
}
- else
- return 0;
+ return false;
+}
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
}
static void
@@ -82,7 +83,7 @@ static void init_x2apic_ldr(void)
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && x2apic_phys)
+ if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
return 1;
return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8cfade9510a..794f6eb54cd 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
*
* SGI UV APIC functions (note: not an Intel compatible APIC)
*
- * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/cpumask.h>
#include <linux/hardirq.h>
@@ -91,10 +91,16 @@ static int __init early_get_pnodeid(void)
m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
uv_min_hub_revision_id = node_id.s.revision;
- if (node_id.s.part_number == UV2_HUB_PART_NUMBER)
- uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
- if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X)
+ switch (node_id.s.part_number) {
+ case UV2_HUB_PART_NUMBER:
+ case UV2_HUB_PART_NUMBER_X:
uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
+ break;
+ case UV3_HUB_PART_NUMBER:
+ case UV3_HUB_PART_NUMBER_X:
+ uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1;
+ break;
+ }
uv_hub_info->hub_revision = uv_min_hub_revision_id;
pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
@@ -130,13 +136,16 @@ static void __init uv_set_apicid_hibit(void)
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
- int pnodeid, is_uv1, is_uv2;
+ int pnodeid, is_uv1, is_uv2, is_uv3;
is_uv1 = !strcmp(oem_id, "SGI");
is_uv2 = !strcmp(oem_id, "SGI2");
- if (is_uv1 || is_uv2) {
+ is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */
+ if (is_uv1 || is_uv2 || is_uv3) {
uv_hub_info->hub_revision =
- is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE;
+ (is_uv1 ? UV1_HUB_REVISION_BASE :
+ (is_uv2 ? UV2_HUB_REVISION_BASE :
+ UV3_HUB_REVISION_BASE));
pnodeid = early_get_pnodeid();
early_get_apic_pnode_shift();
x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
@@ -450,14 +459,17 @@ static __init void map_high(char *id, unsigned long base, int pshift,
paddr = base << pshift;
bytes = (1UL << bshift) * (max_pnode + 1);
- printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
- paddr + bytes);
+ if (!paddr) {
+ pr_info("UV: Map %s_HI base address NULL\n", id);
+ return;
+ }
+ pr_info("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
if (map_type == map_uc)
init_extra_mapping_uc(paddr, bytes);
else
init_extra_mapping_wb(paddr, bytes);
-
}
+
static __init void map_gru_high(int max_pnode)
{
union uvh_rh_gam_gru_overlay_config_mmr_u gru;
@@ -468,7 +480,8 @@ static __init void map_gru_high(int max_pnode)
map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
gru_start_paddr = ((u64)gru.s.base << shift);
gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
-
+ } else {
+ pr_info("UV: GRU disabled\n");
}
}
@@ -480,23 +493,146 @@ static __init void map_mmr_high(int max_pnode)
mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
if (mmr.s.enable)
map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
+ else
+ pr_info("UV: MMR disabled\n");
+}
+
+/*
+ * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY
+ * and REDIRECT MMR regs are exactly the same on UV3.
+ */
+struct mmioh_config {
+ unsigned long overlay;
+ unsigned long redirect;
+ char *id;
+};
+
+static __initdata struct mmioh_config mmiohs[] = {
+ {
+ UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR,
+ UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR,
+ "MMIOH0"
+ },
+ {
+ UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR,
+ UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR,
+ "MMIOH1"
+ },
+};
+
+static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
+{
+ union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay;
+ unsigned long mmr;
+ unsigned long base;
+ int i, n, shift, m_io, max_io;
+ int nasid, lnasid, fi, li;
+ char *id;
+
+ id = mmiohs[index].id;
+ overlay.v = uv_read_local_mmr(mmiohs[index].overlay);
+ pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n",
+ id, overlay.v, overlay.s3.base, overlay.s3.m_io);
+ if (!overlay.s3.enable) {
+ pr_info("UV: %s disabled\n", id);
+ return;
+ }
+
+ shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT;
+ base = (unsigned long)overlay.s3.base;
+ m_io = overlay.s3.m_io;
+ mmr = mmiohs[index].redirect;
+ n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
+ min_pnode *= 2; /* convert to NASID */
+ max_pnode *= 2;
+ max_io = lnasid = fi = li = -1;
+
+ for (i = 0; i < n; i++) {
+ union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect;
+
+ redirect.v = uv_read_local_mmr(mmr + i * 8);
+ nasid = redirect.s3.nasid;
+ if (nasid < min_pnode || max_pnode < nasid)
+ nasid = -1; /* invalid NASID */
+
+ if (nasid == lnasid) {
+ li = i;
+ if (i != n-1) /* last entry check */
+ continue;
+ }
+
+ /* check if we have a cached (or last) redirect to print */
+ if (lnasid != -1 || (i == n-1 && nasid != -1)) {
+ unsigned long addr1, addr2;
+ int f, l;
+
+ if (lnasid == -1) {
+ f = l = i;
+ lnasid = nasid;
+ } else {
+ f = fi;
+ l = li;
+ }
+ addr1 = (base << shift) +
+ f * (unsigned long)(1 << m_io);
+ addr2 = (base << shift) +
+ (l + 1) * (unsigned long)(1 << m_io);
+ pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
+ id, fi, li, lnasid, addr1, addr2);
+ if (max_io < l)
+ max_io = l;
+ }
+ fi = li = i;
+ lnasid = nasid;
+ }
+
+ pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n",
+ id, base, shift, m_io, max_io);
+
+ if (max_io >= 0)
+ map_high(id, base, shift, m_io, max_io, map_uc);
}
-static __init void map_mmioh_high(int max_pnode)
+static __init void map_mmioh_high(int min_pnode, int max_pnode)
{
union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
- int shift;
+ unsigned long mmr, base;
+ int shift, enable, m_io, n_io;
- mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
- if (is_uv1_hub() && mmioh.s1.enable) {
- shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
- map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io,
- max_pnode, map_uc);
+ if (is_uv3_hub()) {
+ /* Map both MMIOH Regions */
+ map_mmioh_high_uv3(0, min_pnode, max_pnode);
+ map_mmioh_high_uv3(1, min_pnode, max_pnode);
+ return;
}
- if (is_uv2_hub() && mmioh.s2.enable) {
+
+ if (is_uv1_hub()) {
+ mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
+ shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ mmioh.v = uv_read_local_mmr(mmr);
+ enable = !!mmioh.s1.enable;
+ base = mmioh.s1.base;
+ m_io = mmioh.s1.m_io;
+ n_io = mmioh.s1.n_io;
+ } else if (is_uv2_hub()) {
+ mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
- map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io,
- max_pnode, map_uc);
+ mmioh.v = uv_read_local_mmr(mmr);
+ enable = !!mmioh.s2.enable;
+ base = mmioh.s2.base;
+ m_io = mmioh.s2.m_io;
+ n_io = mmioh.s2.n_io;
+ } else
+ return;
+
+ if (enable) {
+ max_pnode &= (1 << n_io) - 1;
+ pr_info(
+ "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
+ base, shift, m_io, n_io, max_pnode);
+ map_high("MMIOH", base, shift, m_io, max_pnode, map_uc);
+ } else {
+ pr_info("UV: MMIOH disabled\n");
}
}
@@ -724,42 +860,41 @@ void uv_nmi_init(void)
void __init uv_system_init(void)
{
union uvh_rh_gam_config_mmr_u m_n_config;
- union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
union uvh_node_id_u node_id;
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
- int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
- int gnode_extra, max_pnode = 0;
+ int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+ int gnode_extra, min_pnode = 999999, max_pnode = -1;
unsigned long mmr_base, present, paddr;
- unsigned short pnode_mask, pnode_io_mask;
+ unsigned short pnode_mask;
+ char *hub = (is_uv1_hub() ? "UV1" :
+ (is_uv2_hub() ? "UV2" :
+ "UV3"));
- printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2");
+ pr_info("UV: Found %s hub\n", hub);
map_low_mmrs();
m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
m_val = m_n_config.s.m_skt;
n_val = m_n_config.s.n_skt;
- mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
- n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io;
+ pnode_mask = (1 << n_val) - 1;
mmr_base =
uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
~UV_MMR_ENABLE;
- pnode_mask = (1 << n_val) - 1;
- pnode_io_mask = (1 << n_io) - 1;
node_id.v = uv_read_local_mmr(UVH_NODE_ID);
gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
gnode_upper = ((unsigned long)gnode_extra << m_val);
- printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
- n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
+ pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x\n",
+ n_val, m_val, pnode_mask, gnode_upper, gnode_extra);
- printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+ pr_info("UV: global MMR base 0x%lx\n", mmr_base);
for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
uv_possible_blades +=
hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
/* uv_num_possible_blades() is really the hub count */
- printk(KERN_INFO "UV: Found %d blades, %d hubs\n",
+ pr_info("UV: Found %d blades, %d hubs\n",
is_uv1_hub() ? uv_num_possible_blades() :
(uv_num_possible_blades() + 1) / 2,
uv_num_possible_blades());
@@ -794,6 +929,7 @@ void __init uv_system_init(void)
uv_blade_info[blade].nr_possible_cpus = 0;
uv_blade_info[blade].nr_online_cpus = 0;
spin_lock_init(&uv_blade_info[blade].nmi_lock);
+ min_pnode = min(pnode, min_pnode);
max_pnode = max(pnode, max_pnode);
blade++;
}
@@ -856,7 +992,7 @@ void __init uv_system_init(void)
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
- map_mmioh_high(max_pnode & pnode_io_mask);
+ map_mmioh_high(min_pnode, max_pnode);
uv_cpu_init();
uv_scir_register_cpu_notifier();
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d65464e4350..66b5faffe14 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -232,6 +232,7 @@
#include <linux/acpi.h>
#include <linux/syscore_ops.h>
#include <linux/i8253.h>
+#include <linux/cpuidle.h>
#include <asm/uaccess.h>
#include <asm/desc.h>
@@ -360,13 +361,35 @@ struct apm_user {
* idle percentage above which bios idle calls are done
*/
#ifdef CONFIG_APM_CPU_IDLE
-#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012
#define DEFAULT_IDLE_THRESHOLD 95
#else
#define DEFAULT_IDLE_THRESHOLD 100
#endif
#define DEFAULT_IDLE_PERIOD (100 / 3)
+static int apm_cpu_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index);
+
+static struct cpuidle_driver apm_idle_driver = {
+ .name = "apm_idle",
+ .owner = THIS_MODULE,
+ .en_core_tk_irqen = 1,
+ .states = {
+ { /* entry 0 is for polling */ },
+ { /* entry 1 is for APM idle */
+ .name = "APM",
+ .desc = "APM idle",
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 250, /* WAG */
+ .target_residency = 500, /* WAG */
+ .enter = &apm_cpu_idle
+ },
+ },
+ .state_count = 2,
+};
+
+static struct cpuidle_device apm_cpuidle_device;
+
/*
* Local variables
*/
@@ -377,7 +400,6 @@ static struct {
static int clock_slowed;
static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
-static int set_pm_idle;
static int suspends_pending;
static int standbys_pending;
static int ignore_sys_suspend;
@@ -884,8 +906,6 @@ static void apm_do_busy(void)
#define IDLE_CALC_LIMIT (HZ * 100)
#define IDLE_LEAKY_MAX 16
-static void (*original_pm_idle)(void) __read_mostly;
-
/**
* apm_cpu_idle - cpu idling for APM capable Linux
*
@@ -894,35 +914,36 @@ static void (*original_pm_idle)(void) __read_mostly;
* Furthermore it calls the system default idle routine.
*/
-static void apm_cpu_idle(void)
+static int apm_cpu_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
{
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
static unsigned int last_stime; /* = 0 */
+ cputime_t stime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
unsigned int bucket;
- WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
recalc:
+ task_cputime(current, NULL, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
} else if (jiffies_since_last_check > idle_period) {
unsigned int idle_percentage;
- idle_percentage = current->stime - last_stime;
+ idle_percentage = stime - last_stime;
idle_percentage *= 100;
idle_percentage /= jiffies_since_last_check;
use_apm_idle = (idle_percentage > idle_threshold);
if (apm_info.forbid_idle)
use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
}
+ last_jiffies = jiffies;
+ last_stime = stime;
+
bucket = IDLE_LEAKY_MAX;
while (!need_resched()) {
@@ -950,10 +971,7 @@ recalc:
break;
}
}
- if (original_pm_idle)
- original_pm_idle();
- else
- default_idle();
+ default_idle();
local_irq_disable();
jiffies_since_last_check = jiffies - last_jiffies;
if (jiffies_since_last_check > idle_period)
@@ -963,7 +981,7 @@ recalc:
if (apm_idle_done)
apm_do_busy();
- local_irq_enable();
+ return index;
}
/**
@@ -2381,9 +2399,9 @@ static int __init apm_init(void)
if (HZ != 100)
idle_period = (idle_period * HZ) / 100;
if (idle_threshold < 100) {
- original_pm_idle = pm_idle;
- pm_idle = apm_cpu_idle;
- set_pm_idle = 1;
+ if (!cpuidle_register_driver(&apm_idle_driver))
+ if (cpuidle_register_device(&apm_cpuidle_device))
+ cpuidle_unregister_driver(&apm_idle_driver);
}
return 0;
@@ -2393,15 +2411,9 @@ static void __exit apm_exit(void)
{
int error;
- if (set_pm_idle) {
- pm_idle = original_pm_idle;
- /*
- * We are about to unload the current idle thread pm callback
- * (pm_idle), Wait for all processors to update cached/local
- * copies of pm_idle before proceeding.
- */
- kick_all_cpus_sync();
- }
+ cpuidle_unregister_device(&apm_cpuidle_device);
+ cpuidle_unregister_driver(&apm_idle_driver);
+
if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
&& (apm_info.connection_version > 0x0100)) {
error = apm_engage_power_management(APM_DEVICE_ALL, 0);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 15239fffd6f..fa96eb0d02f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -12,7 +12,6 @@
#include <asm/pci-direct.h>
#ifdef CONFIG_X86_64
-# include <asm/numa_64.h>
# include <asm/mmconfig.h>
# include <asm/cacheflush.h>
#endif
@@ -220,8 +219,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
*/
WARN_ONCE(1, "WARNING: This combination of AMD"
" processors is not suitable for SMP.\n");
- if (!test_taint(TAINT_UNSAFE_SMP))
- add_taint(TAINT_UNSAFE_SMP);
+ add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE);
valid_k7:
;
@@ -364,9 +362,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
#endif
}
-int amd_get_nb_id(int cpu)
+u16 amd_get_nb_id(int cpu)
{
- int id = 0;
+ u16 id = 0;
#ifdef CONFIG_SMP
id = per_cpu(cpu_llc_id, cpu);
#endif
@@ -518,10 +516,9 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
{
u32 dummy;
-
-#ifdef CONFIG_SMP
unsigned long long value;
+#ifdef CONFIG_SMP
/*
* Disable TLB flush filter by setting HWCR.FFDIS on K8
* bit 6 of msr C001_0015
@@ -559,12 +556,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
* (AMD Erratum #110, docId: 25759).
*/
if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
- u64 val;
-
clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
- if (!rdmsrl_amd_safe(0xc001100d, &val)) {
- val &= ~(1ULL << 32);
- wrmsrl_amd_safe(0xc001100d, val);
+ if (!rdmsrl_amd_safe(0xc001100d, &value)) {
+ value &= ~(1ULL << 32);
+ wrmsrl_amd_safe(0xc001100d, value);
}
}
@@ -617,13 +612,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
if ((c->x86 == 0x15) &&
(c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
!cpu_has(c, X86_FEATURE_TOPOEXT)) {
- u64 val;
- if (!rdmsrl_safe(0xc0011005, &val)) {
- val |= 1ULL << 54;
- wrmsrl_safe(0xc0011005, val);
- rdmsrl(0xc0011005, val);
- if (val & (1ULL << 54)) {
+ if (!rdmsrl_safe(0xc0011005, &value)) {
+ value |= 1ULL << 54;
+ wrmsrl_safe(0xc0011005, value);
+ rdmsrl(0xc0011005, value);
+ if (value & (1ULL << 54)) {
set_cpu_cap(c, X86_FEATURE_TOPOEXT);
printk(KERN_INFO FW_INFO "CPU: Re-enabling "
"disabled Topology Extensions Support\n");
@@ -637,11 +631,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
*/
if ((c->x86 == 0x15) &&
(c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
- u64 val;
- if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) {
- val |= 0x1E;
- wrmsrl_safe(0xc0011021, val);
+ if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) {
+ value |= 0x1E;
+ wrmsrl_safe(0xc0011021, value);
}
}
@@ -685,12 +678,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
* benefit in doing so.
*/
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+ unsigned long pfn = tseg >> PAGE_SHIFT;
+
printk(KERN_DEBUG "tseg: %010llx\n", tseg);
- if ((tseg>>PMD_SHIFT) <
- (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
- ((tseg>>PMD_SHIFT) <
- (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
- (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
+ if (pfn_range_is_mapped(pfn, pfn + 1))
set_memory_4k((unsigned long)__va(tseg), 1);
}
}
@@ -703,13 +694,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
if (c->x86 > 0x11)
set_cpu_cap(c, X86_FEATURE_ARAT);
- /*
- * Disable GART TLB Walk Errors on Fam10h. We do this here
- * because this is always needed when GART is enabled, even in a
- * kernel which has no MCE support built in.
- */
if (c->x86 == 0x10) {
/*
+ * Disable GART TLB Walk Errors on Fam10h. We do this here
+ * because this is always needed when GART is enabled, even in a
+ * kernel which has no MCE support built in.
* BIOS should disable GartTlbWlk Errors themself. If
* it doesn't do it here as suggested by the BKDG.
*
@@ -723,6 +712,21 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
mask |= (1 << 10);
wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask);
}
+
+ /*
+ * On family 10h BIOS may not have properly enabled WC+ support,
+ * causing it to be converted to CD memtype. This may result in
+ * performance degradation for certain nested-paging guests.
+ * Prevent this conversion by clearing bit 24 in
+ * MSR_AMD64_BU_CFG2.
+ *
+ * NOTE: we want to use the _safe accessors so as not to #GP kvm
+ * guests on older kvm hosts.
+ */
+
+ rdmsrl_safe(MSR_AMD64_BU_CFG2, &value);
+ value &= ~(1ULL << 24);
+ wrmsrl_safe(MSR_AMD64_BU_CFG2, value);
}
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 92dfec986a4..af6455e3fcc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,15 +17,6 @@
#include <asm/paravirt.h>
#include <asm/alternative.h>
-static int __init no_halt(char *s)
-{
- WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n");
- boot_cpu_data.hlt_works_ok = 0;
- return 1;
-}
-
-__setup("no-hlt", no_halt);
-
static int __init no_387(char *s)
{
boot_cpu_data.hard_math = 0;
@@ -89,23 +80,6 @@ static void __init check_fpu(void)
pr_warn("Hmm, FPU with FDIV bug\n");
}
-static void __init check_hlt(void)
-{
- if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
- return;
-
- pr_info("Checking 'hlt' instruction... ");
- if (!boot_cpu_data.hlt_works_ok) {
- pr_cont("disabled\n");
- return;
- }
- halt();
- halt();
- halt();
- halt();
- pr_cont("OK\n");
-}
-
/*
* Check whether we are able to run this kernel safely on SMP.
*
@@ -129,7 +103,6 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
#endif
check_config();
- check_hlt();
init_utsname()->machine[1] =
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9c3ab43a695..d814772c5be 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -37,6 +37,8 @@
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/pat.h>
+#include <asm/microcode.h>
+#include <asm/microcode_intel.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -213,7 +215,7 @@ static inline int flag_is_changeable_p(u32 flag)
}
/* Probe for the CPUID instruction */
-static int __cpuinit have_cpuid_p(void)
+int __cpuinit have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
@@ -249,11 +251,6 @@ static inline int flag_is_changeable_p(u32 flag)
{
return 1;
}
-/* Probe for the CPUID instruction */
-static inline int have_cpuid_p(void)
-{
- return 1;
-}
static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
{
}
@@ -1223,6 +1220,12 @@ void __cpuinit cpu_init(void)
int cpu;
int i;
+ /*
+ * Load microcode on this cpu if a valid microcode is available.
+ * This is early microcode loading procedure.
+ */
+ load_ucode_ap();
+
cpu = stack_smp_processor_id();
t = &per_cpu(init_tss, cpu);
oist = &per_cpu(orig_ist, cpu);
@@ -1314,6 +1317,8 @@ void __cpuinit cpu_init(void)
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct thread_struct *thread = &curr->thread;
+ show_ucode_info_early();
+
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
for (;;)
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index a8f8fa9769d..1e7e84a02eb 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void)
if (x86_hyper->init_platform)
x86_hyper->init_platform();
}
+
+bool __init hypervisor_x2apic_available(void)
+{
+ return x86_hyper &&
+ x86_hyper->x2apic_available &&
+ x86_hyper->x2apic_available();
+}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fcaabd0432c..1905ce98bee 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -17,7 +17,6 @@
#ifdef CONFIG_X86_64
#include <linux/topology.h>
-#include <asm/numa_64.h>
#endif
#include "cpu.h"
@@ -168,7 +167,7 @@ int __cpuinit ppro_with_ram_bug(void)
#ifdef CONFIG_X86_F00F_BUG
static void __cpuinit trap_init_f00f_bug(void)
{
- __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+ __set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
/*
* Update the IDT descriptor and reload the IDT so that
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fe9edec6698..7c6f7d548c0 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -298,8 +298,7 @@ struct _cache_attr {
unsigned int);
};
-#ifdef CONFIG_AMD_NB
-
+#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
/*
* L3 cache descriptors
*/
@@ -524,9 +523,9 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
static struct _cache_attr subcaches =
__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
-#else /* CONFIG_AMD_NB */
+#else
#define amd_init_l3_cache(x, y)
-#endif /* CONFIG_AMD_NB */
+#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
static int
__cpuinit cpuid4_cache_lookup_regs(int index,
@@ -1227,7 +1226,7 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
.notifier_call = cacheinfo_cpu_callback,
};
-static int __cpuinit cache_sysfs_init(void)
+static int __init cache_sysfs_init(void)
{
int i;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 80dbda84f1c..7bc126346ac 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -512,11 +512,8 @@ int mce_available(struct cpuinfo_x86 *c)
static void mce_schedule_work(void)
{
- if (!mce_ring_empty()) {
- struct work_struct *work = &__get_cpu_var(mce_work);
- if (!work_pending(work))
- schedule_work(work);
- }
+ if (!mce_ring_empty())
+ schedule_work(&__get_cpu_var(mce_work));
}
DEFINE_PER_CPU(struct irq_work, mce_irq_work);
@@ -1085,7 +1082,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
/*
* Set taint even when machine check was not enabled.
*/
- add_taint(TAINT_MACHINE_CHECK);
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
severity = mce_severity(&m, cfg->tolerant, NULL);
@@ -1351,12 +1348,7 @@ int mce_notify_irq(void)
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
- /*
- * There is no risk of missing notifications because
- * work_pending is always cleared before the function is
- * executed.
- */
- if (mce_helper[0] && !work_pending(&mce_trigger_work))
+ if (mce_helper[0])
schedule_work(&mce_trigger_work);
if (__ratelimit(&ratelimit))
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 2d5454cd2c4..1c044b1ccc5 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -33,7 +33,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
smp_processor_id());
}
- add_taint(TAINT_MACHINE_CHECK);
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
}
/* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 2d7998fb628..e9a701aecaa 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -15,7 +15,7 @@
static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
- add_taint(TAINT_MACHINE_CHECK);
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
}
/* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 0a630dd4b62..a7d26d83fb7 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -14,10 +14,15 @@
#include <linux/time.h>
#include <linux/clocksource.h>
#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/interrupt.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
+#include <asm/desc.h>
+#include <asm/idle.h>
+#include <asm/irq_regs.h>
struct ms_hyperv_info ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -30,6 +35,13 @@ static bool __init ms_hyperv_platform(void)
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
return false;
+ /*
+ * Xen emulates Hyper-V to support enlightened Windows.
+ * Check to see first if we are on a Xen Hypervisor.
+ */
+ if (xen_cpuid_base())
+ return false;
+
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
@@ -68,7 +80,14 @@ static void __init ms_hyperv_init_platform(void)
printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
ms_hyperv.features, ms_hyperv.hints);
- clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
+ if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
+ clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
+#if IS_ENABLED(CONFIG_HYPERV)
+ /*
+ * Setup the IDT for hypervisor callback.
+ */
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
+#endif
}
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
@@ -77,3 +96,36 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
.init_platform = ms_hyperv_init_platform,
};
EXPORT_SYMBOL(x86_hyper_ms_hyperv);
+
+#if IS_ENABLED(CONFIG_HYPERV)
+static int vmbus_irq = -1;
+static irq_handler_t vmbus_isr;
+
+void hv_register_vmbus_handler(int irq, irq_handler_t handler)
+{
+ vmbus_irq = irq;
+ vmbus_isr = handler;
+}
+
+void hyperv_vector_handler(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ struct irq_desc *desc;
+
+ irq_enter();
+ exit_idle();
+
+ desc = irq_to_desc(vmbus_irq);
+
+ if (desc)
+ generic_handle_irq_desc(vmbus_irq, desc);
+
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+#else
+void hv_register_vmbus_handler(int irq, irq_handler_t handler)
+{
+}
+#endif
+EXPORT_SYMBOL_GPL(hv_register_vmbus_handler);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index e9fe907cd24..fa72a39e5d4 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -542,7 +542,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
if (tmp != mask_lo) {
printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
- add_taint(TAINT_FIRMWARE_WORKAROUND);
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
mask_lo = tmp;
}
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4428fd178bc..bf0f01aea99 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -340,9 +340,6 @@ int x86_setup_perfctr(struct perf_event *event)
/* BTS is currently only allowed for user-mode. */
if (!attr->exclude_kernel)
return -EOPNOTSUPP;
-
- if (!attr->exclude_guest)
- return -EOPNOTSUPP;
}
hwc->config |= config;
@@ -385,9 +382,6 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip) {
int precise = 0;
- if (!event->attr.exclude_guest)
- return -EOPNOTSUPP;
-
/* Support for constant skid */
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
precise++;
@@ -835,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
} else {
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
- hwc->event_base_rdpmc = hwc->idx;
+ hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
}
}
@@ -1316,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = {
.attrs = NULL,
};
-struct perf_pmu_events_attr {
- struct device_attribute attr;
- u64 id;
-};
-
/*
* Remove all undefined events (x86_pmu.event_map(id) == 0)
* out of events_attr attributes.
@@ -1354,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
-#define EVENT_ATTR(_name, _id) \
-static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
- .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
- .id = PERF_COUNT_HW_##_id, \
-};
+#define EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
+ events_sysfs_show)
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 115c1ea9774..7f5c75c2afd 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -325,6 +325,8 @@ struct x86_pmu {
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
unsigned perfctr;
+ int (*addr_offset)(int index, bool eventsel);
+ int (*rdpmc_index)(int index);
u64 (*event_map)(int);
int max_events;
int num_counters;
@@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs
u64 x86_perf_event_update(struct perf_event *event);
-static inline int x86_pmu_addr_offset(int index)
+static inline unsigned int x86_pmu_config_addr(int index)
{
- int offset;
-
- /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
- alternative_io(ASM_NOP2,
- "shll $1, %%eax",
- X86_FEATURE_PERFCTR_CORE,
- "=a" (offset),
- "a" (index));
-
- return offset;
+ return x86_pmu.eventsel + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, true) : index);
}
-static inline unsigned int x86_pmu_config_addr(int index)
+static inline unsigned int x86_pmu_event_addr(int index)
{
- return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+ return x86_pmu.perfctr + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, false) : index);
}
-static inline unsigned int x86_pmu_event_addr(int index)
+static inline int x86_pmu_rdpmc_index(int index)
{
- return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+ return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
}
int x86_setup_perfctr(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c93bc4e813a..dfdab42aed2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
-static int amd_pmu_hw_config(struct perf_event *event)
+static struct event_constraint *amd_nb_event_constraint;
+
+/*
+ * Previously calculated offsets
+ */
+static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
+
+/*
+ * Legacy CPUs:
+ * 4 counters starting at 0xc0010000 each offset by 1
+ *
+ * CPUs with core performance counter extensions:
+ * 6 counters starting at 0xc0010200 each offset by 2
+ *
+ * CPUs with north bridge performance counter extensions:
+ * 4 additional counters starting at 0xc0010240 each offset by 2
+ * (indexed right above either one of the above core counters)
+ */
+static inline int amd_pmu_addr_offset(int index, bool eventsel)
{
- int ret;
+ int offset, first, base;
- /* pass precise event sampling to ibs: */
- if (event->attr.precise_ip && get_ibs_caps())
- return -ENOENT;
+ if (!index)
+ return index;
+
+ if (eventsel)
+ offset = event_offsets[index];
+ else
+ offset = count_offsets[index];
+
+ if (offset)
+ return offset;
+
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk)) {
+ /*
+ * calculate the offset of NB counters with respect to
+ * base eventsel or perfctr
+ */
+
+ first = find_first_bit(amd_nb_event_constraint->idxmsk,
+ X86_PMC_IDX_MAX);
+
+ if (eventsel)
+ base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
+ else
+ base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
+
+ offset = base + ((index - first) << 1);
+ } else if (!cpu_has_perfctr_core)
+ offset = index;
+ else
+ offset = index << 1;
+
+ if (eventsel)
+ event_offsets[index] = offset;
+ else
+ count_offsets[index] = offset;
+
+ return offset;
+}
+
+static inline int amd_pmu_rdpmc_index(int index)
+{
+ int ret, first;
+
+ if (!index)
+ return index;
+
+ ret = rdpmc_indexes[index];
- ret = x86_pmu_hw_config(event);
if (ret)
return ret;
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk)) {
+ /*
+ * according to the mnual, ECX value of the NB counters is
+ * the index of the NB counter (0, 1, 2 or 3) plus 6
+ */
+
+ first = find_first_bit(amd_nb_event_constraint->idxmsk,
+ X86_PMC_IDX_MAX);
+ ret = index - first + 6;
+ } else
+ ret = index;
+
+ rdpmc_indexes[index] = ret;
+
+ return ret;
+}
+static int amd_core_hw_config(struct perf_event *event)
+{
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event)
event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
ARCH_PERFMON_EVENTSEL_OS);
else if (event->attr.exclude_host)
- event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
+ event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
else if (event->attr.exclude_guest)
- event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
+ event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
+
+ return 0;
+}
+
+/*
+ * NB counters do not support the following event select bits:
+ * Host/Guest only
+ * Counter mask
+ * Invert counter mask
+ * Edge detect
+ * OS/User mode
+ */
+static int amd_nb_hw_config(struct perf_event *event)
+{
+ /* for NB, we only allow system wide counting mode */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
- if (event->attr.type != PERF_TYPE_RAW)
- return 0;
+ event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+ ARCH_PERFMON_EVENTSEL_OS);
- event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+ if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
+ ARCH_PERFMON_EVENTSEL_INT))
+ return -EINVAL;
return 0;
}
@@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
+static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
+{
+ return amd_nb_event_constraint && amd_is_nb_event(hwc);
+}
+
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;
@@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
return nb && nb->nb_id != -1;
}
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
+static int amd_pmu_hw_config(struct perf_event *event)
+{
+ int ret;
+
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ ret = x86_pmu_hw_config(event);
+ if (ret)
+ return ret;
+
+ if (event->attr.type == PERF_TYPE_RAW)
+ event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+ if (amd_is_perfctr_nb_event(&event->hw))
+ return amd_nb_hw_config(event);
+
+ return amd_core_hw_config(event);
+}
+
+static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
int i;
/*
- * only care about NB events
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return;
-
- /*
* need to scan whole list because event may not have
* been assigned during scheduling
*
@@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
}
}
+static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
+{
+ int core_id = cpu_data(smp_processor_id()).cpu_core_id;
+
+ /* deliver interrupts only to this core */
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
+ hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
+ hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
+ hwc->config |= (u64)(core_id) <<
+ AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
+ }
+}
+
/*
* AMD64 NorthBridge events need special treatment because
* counter access needs to be synchronized across all cores
@@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
*
* Given that resources are allocated (cmpxchg), they must be
* eventually freed for others to use. This is accomplished by
- * calling amd_put_event_constraints().
+ * calling __amd_put_nb_event_constraints()
*
* Non NB events are not impacted by this restriction.
*/
static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+ struct event_constraint *c)
{
struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
- struct perf_event *old = NULL;
- int max = x86_pmu.num_counters;
- int i, j, k = -1;
+ struct perf_event *old;
+ int idx, new = -1;
- /*
- * if not NB event or no NB, then no constraints
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return &unconstrained;
+ if (!c)
+ c = &unconstrained;
+
+ if (cpuc->is_fake)
+ return c;
/*
* detect if already present, if so reuse
@@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
- for (i = 0; i < max; i++) {
- /*
- * keep track of first free slot
- */
- if (k == -1 && !nb->owners[i])
- k = i;
+ for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+ if (new == -1 || hwc->idx == idx)
+ /* assign free slot, prefer hwc->idx */
+ old = cmpxchg(nb->owners + idx, NULL, event);
+ else if (nb->owners[idx] == event)
+ /* event already present */
+ old = event;
+ else
+ continue;
+
+ if (old && old != event)
+ continue;
+
+ /* reassign to this slot */
+ if (new != -1)
+ cmpxchg(nb->owners + new, event, NULL);
+ new = idx;
/* already present, reuse */
- if (nb->owners[i] == event)
- goto done;
- }
- /*
- * not present, so grab a new slot
- * starting either at:
- */
- if (hwc->idx != -1) {
- /* previous assignment */
- i = hwc->idx;
- } else if (k != -1) {
- /* start from free slot found */
- i = k;
- } else {
- /*
- * event not found, no slot found in
- * first pass, try again from the
- * beginning
- */
- i = 0;
- }
- j = i;
- do {
- old = cmpxchg(nb->owners+i, NULL, event);
- if (!old)
+ if (old == event)
break;
- if (++i == max)
- i = 0;
- } while (i != j);
-done:
- if (!old)
- return &nb->event_constraints[i];
-
- return &emptyconstraint;
+ }
+
+ if (new == -1)
+ return &emptyconstraint;
+
+ if (amd_is_perfctr_nb_event(hwc))
+ amd_nb_interrupt_hw_config(hwc);
+
+ return &nb->event_constraints[new];
}
static struct amd_nb *amd_alloc_nb(int cpu)
@@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu)
struct amd_nb *nb;
int i, nb_id;
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
if (boot_cpu_data.x86_max_cores < 2)
return;
@@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ /*
+ * if not NB event or no NB, then no constraints
+ */
+ if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
+ return &unconstrained;
+
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
+ __amd_put_nb_event_constraints(cpuc, event);
+}
+
PMU_FORMAT_ATTR(event, "config:0-7,32-35");
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
+static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
+
static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
{
@@ -561,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
return &amd_f15_PMC20;
}
case AMD_EVENT_NB:
- /* not yet implemented */
- return &emptyconstraint;
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
default:
return &emptyconstraint;
}
@@ -587,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = {
.schedule_events = x86_schedule_events,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
+ .addr_offset = amd_pmu_addr_offset,
+ .rdpmc_index = amd_pmu_rdpmc_index,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
@@ -608,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static int setup_event_constraints(void)
{
- if (boot_cpu_data.x86 >= 0x15)
+ if (boot_cpu_data.x86 == 0x15)
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
return 0;
}
@@ -638,6 +790,23 @@ static int setup_perfctr_core(void)
return 0;
}
+static int setup_perfctr_nb(void)
+{
+ if (!cpu_has_perfctr_nb)
+ return -ENODEV;
+
+ x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
+
+ if (cpu_has_perfctr_core)
+ amd_nb_event_constraint = &amd_NBPMC96;
+ else
+ amd_nb_event_constraint = &amd_NBPMC74;
+
+ printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
+
+ return 0;
+}
+
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
@@ -648,6 +817,7 @@ __init int amd_pmu_init(void)
setup_event_constraints();
setup_perfctr_core();
+ setup_perfctr_nb();
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
@@ -678,7 +848,7 @@ void amd_pmu_disable_virt(void)
* SVM is disabled the Guest-only bits still gets set and the counter
* will not count anything.
*/
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
/* Reload all events */
x86_pmu_disable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 6336bcbd061..5f0581e713c 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -528,7 +528,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
if (!test_bit(IBS_STARTED, pcpu->state)) {
/*
* Catch spurious interrupts after stopping IBS: After
- * disabling IBS there could be still incomming NMIs
+ * disabling IBS there could be still incoming NMIs
* with samples that even have the valid bit cleared.
* Mark all this NMIs as handled.
*/
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 93b9e1181f8..4914e94ad6e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2019,7 +2019,10 @@ __init int intel_pmu_init(void)
break;
case 28: /* Atom */
- case 54: /* Cedariew */
+ case 38: /* Lincroft */
+ case 39: /* Penwell */
+ case 53: /* Cloverview */
+ case 54: /* Cedarview */
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2084,6 +2087,7 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
case 58: /* IvyBridge */
+ case 62: /* IvyBridge EP */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 3cf3d97cce3..b43200dbfe7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2500,7 +2500,7 @@ static bool pcidrv_registered;
/*
* add a pci uncore device
*/
-static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
+static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
{
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
@@ -2571,8 +2571,8 @@ static void uncore_pci_remove(struct pci_dev *pdev)
kfree(box);
}
-static int __devinit uncore_pci_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int uncore_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
{
struct intel_uncore_type *type;
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index f2af39f5dc3..4820c232a0b 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -19,7 +19,7 @@ static const u64 p6_perfmon_event_map[] =
};
-static __initconst u64 p6_hw_cache_event_ids
+static u64 p6_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 3286a92e662..e280253f6f9 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -28,7 +28,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
{
seq_printf(m,
"fdiv_bug\t: %s\n"
- "hlt_bug\t\t: %s\n"
"f00f_bug\t: %s\n"
"coma_bug\t: %s\n"
"fpu\t\t: %s\n"
@@ -36,7 +35,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
"cpuid level\t: %d\n"
"wp\t\t: %s\n",
c->fdiv_bug ? "yes" : "no",
- c->hlt_works_ok ? "no" : "yes",
c->f00f_bug ? "yes" : "no",
c->coma_bug ? "yes" : "no",
c->hard_math ? "yes" : "no",
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d22d0c4edcf..03a36321ec5 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -33,6 +33,9 @@
#define VMWARE_PORT_CMD_GETVERSION 10
#define VMWARE_PORT_CMD_GETHZ 45
+#define VMWARE_PORT_CMD_GETVCPU_INFO 68
+#define VMWARE_PORT_CMD_LEGACY_X2APIC 3
+#define VMWARE_PORT_CMD_VCPU_RESERVED 31
#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
__asm__("inl (%%dx)" : \
@@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
}
+/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
+static bool __init vmware_legacy_x2apic_available(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
+ return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
+ (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
+}
+
const __refconst struct hypervisor_x86 x86_hyper_vmware = {
.name = "VMware",
.detect = vmware_platform,
.set_cpu_features = vmware_set_cpu_features,
.init_platform = vmware_platform_setup,
+ .x2apic_available = vmware_legacy_x2apic_available,
};
EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ae42418bc50..c8797d55b24 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -232,7 +232,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
bust_spinlocks(0);
die_owner = -1;
- add_taint(TAINT_DIE);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
die_nest_count--;
if (!die_nest_count)
/* Nest count reaches zero, release the lock. */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index df06ade26be..d32abeabbda 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -835,7 +835,7 @@ static int __init parse_memopt(char *p)
}
early_param("mem", parse_memopt);
-static int __init parse_memmap_opt(char *p)
+static int __init parse_memmap_one(char *p)
{
char *oldp;
u64 start_at, mem_size;
@@ -877,6 +877,20 @@ static int __init parse_memmap_opt(char *p)
return *p == '\0' ? 0 : -EINVAL;
}
+static int __init parse_memmap_opt(char *str)
+{
+ while (str) {
+ char *k = strchr(str, ',');
+
+ if (k)
+ *k++ = 0;
+
+ parse_memmap_one(str);
+ str = k;
+ }
+
+ return 0;
+}
early_param("memmap", parse_memmap_opt);
void __init finish_e820_parsing(void)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index ff84d5469d7..8f3e2dec1df 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -699,51 +699,6 @@ END(syscall_badsys)
*/
.popsection
-/*
- * System calls that need a pt_regs pointer.
- */
-#define PTREGSCALL0(name) \
-ENTRY(ptregs_##name) ; \
- leal 4(%esp),%eax; \
- jmp sys_##name; \
-ENDPROC(ptregs_##name)
-
-#define PTREGSCALL1(name) \
-ENTRY(ptregs_##name) ; \
- leal 4(%esp),%edx; \
- movl (PT_EBX+4)(%esp),%eax; \
- jmp sys_##name; \
-ENDPROC(ptregs_##name)
-
-#define PTREGSCALL2(name) \
-ENTRY(ptregs_##name) ; \
- leal 4(%esp),%ecx; \
- movl (PT_ECX+4)(%esp),%edx; \
- movl (PT_EBX+4)(%esp),%eax; \
- jmp sys_##name; \
-ENDPROC(ptregs_##name)
-
-#define PTREGSCALL3(name) \
-ENTRY(ptregs_##name) ; \
- CFI_STARTPROC; \
- leal 4(%esp),%eax; \
- pushl_cfi %eax; \
- movl PT_EDX(%eax),%ecx; \
- movl PT_ECX(%eax),%edx; \
- movl PT_EBX(%eax),%eax; \
- call sys_##name; \
- addl $4,%esp; \
- CFI_ADJUST_CFA_OFFSET -4; \
- ret; \
- CFI_ENDPROC; \
-ENDPROC(ptregs_##name)
-
-PTREGSCALL1(iopl)
-PTREGSCALL0(sigreturn)
-PTREGSCALL0(rt_sigreturn)
-PTREGSCALL2(vm86)
-PTREGSCALL1(vm86old)
-
.macro FIXUP_ESPFIX_STACK
/*
* Switch back for ESPFIX stack to the normal zerobased stack
@@ -1065,7 +1020,6 @@ ENTRY(xen_failsafe_callback)
lea 16(%esp),%esp
CFI_ADJUST_CFA_OFFSET -16
jz 5f
- addl $16,%esp
jmp iret_exc
5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
@@ -1092,11 +1046,18 @@ ENTRY(xen_failsafe_callback)
_ASM_EXTABLE(4b,9b)
ENDPROC(xen_failsafe_callback)
-BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
+BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
xen_evtchn_do_upcall)
#endif /* CONFIG_XEN */
+#if IS_ENABLED(CONFIG_HYPERV)
+
+BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
+ hyperv_vector_handler)
+
+#endif /* CONFIG_HYPERV */
+
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 07a7a04529b..c1d01e6ca79 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -828,23 +828,6 @@ int_restore_rest:
CFI_ENDPROC
END(system_call)
-/*
- * Certain special system calls that need to save a complete full stack frame.
- */
- .macro PTREGSCALL label,func,arg
-ENTRY(\label)
- PARTIAL_FRAME 1 8 /* offset 8: return address */
- subq $REST_SKIP, %rsp
- CFI_ADJUST_CFA_OFFSET REST_SKIP
- call save_rest
- DEFAULT_FRAME 0 8 /* offset 8: return address */
- leaq 8(%rsp), \arg /* pt_regs pointer */
- call \func
- jmp ptregscall_common
- CFI_ENDPROC
-END(\label)
- .endm
-
.macro FORK_LIKE func
ENTRY(stub_\func)
CFI_STARTPROC
@@ -861,10 +844,22 @@ ENTRY(stub_\func)
END(stub_\func)
.endm
+ .macro FIXED_FRAME label,func
+ENTRY(\label)
+ CFI_STARTPROC
+ PARTIAL_FRAME 0 8 /* offset 8: return address */
+ FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
+ call \func
+ RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
+ ret
+ CFI_ENDPROC
+END(\label)
+ .endm
+
FORK_LIKE clone
FORK_LIKE fork
FORK_LIKE vfork
- PTREGSCALL stub_iopl, sys_iopl, %rsi
+ FIXED_FRAME stub_iopl, sys_iopl
ENTRY(ptregscall_common)
DEFAULT_FRAME 1 8 /* offset 8: return address */
@@ -886,7 +881,6 @@ ENTRY(stub_execve)
SAVE_REST
FIXUP_TOP_OF_STACK %r11
call sys_execve
- RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
@@ -902,7 +896,6 @@ ENTRY(stub_rt_sigreturn)
addq $8, %rsp
PARTIAL_FRAME 0
SAVE_REST
- movq %rsp,%rdi
FIXUP_TOP_OF_STACK %r11
call sys_rt_sigreturn
movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
@@ -917,7 +910,6 @@ ENTRY(stub_x32_rt_sigreturn)
addq $8, %rsp
PARTIAL_FRAME 0
SAVE_REST
- movq %rsp,%rdi
FIXUP_TOP_OF_STACK %r11
call sys32_x32_rt_sigreturn
movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
@@ -1454,11 +1446,16 @@ ENTRY(xen_failsafe_callback)
CFI_ENDPROC
END(xen_failsafe_callback)
-apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
+apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
xen_hvm_callback_vector xen_evtchn_do_upcall
#endif /* CONFIG_XEN */
+#if IS_ENABLED(CONFIG_HYPERV)
+apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
+ hyperv_callback_vector hyperv_vector_handler
+#endif /* CONFIG_HYPERV */
+
/*
* Some functions should be protected against kprobes
*/
@@ -1781,6 +1778,7 @@ first_nmi:
* Leave room for the "copied" frame
*/
subq $(5*8), %rsp
+ CFI_ADJUST_CFA_OFFSET 5*8
/* Copy the stack frame to the Saved frame */
.rept 5
@@ -1863,10 +1861,8 @@ end_repeat_nmi:
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
- RESTORE_ALL 8
-
- /* Pop the extra iret frame */
- addq $(5*8), %rsp
+ /* Pop the extra iret frame at once */
+ RESTORE_ALL 6*8
/* Clear the NMI executing stack variable */
movq $0, 5*8(%rsp)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d414029f1d..42a392a9fd0 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
* kernel identity mapping to modify code.
*/
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
- ip = (unsigned long)__va(__pa(ip));
+ ip = (unsigned long)__va(__pa_symbol(ip));
return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
}
@@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size)
* kernel identity mapping to modify code.
*/
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
- ip = (unsigned long)__va(__pa(ip));
+ ip = (unsigned long)__va(__pa_symbol(ip));
return probe_kernel_write((void *)ip, val, size);
}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c18f59d1010..138463a2487 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,6 +18,7 @@
#include <asm/io_apic.h>
#include <asm/bios_ebda.h>
#include <asm/tlbflush.h>
+#include <asm/bootparam_utils.h>
static void __init i386_default_early_setup(void)
{
@@ -30,19 +31,7 @@ static void __init i386_default_early_setup(void)
void __init i386_start_kernel(void)
{
- memblock_reserve(__pa_symbol(&_text),
- __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
-
-#ifdef CONFIG_BLK_DEV_INITRD
- /* Reserve INITRD */
- if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
- /* Assume only end is not page aligned */
- u64 ramdisk_image = boot_params.hdr.ramdisk_image;
- u64 ramdisk_size = boot_params.hdr.ramdisk_size;
- u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
- }
-#endif
+ sanitize_boot_params(&boot_params);
/* Call the subarch specific early setup function */
switch (boot_params.hdr.hardware_subarch) {
@@ -57,11 +46,5 @@ void __init i386_start_kernel(void)
break;
}
- /*
- * At this point everything still needed from the boot loader
- * or BIOS or kernel text should be early reserved or marked not
- * RAM in e820. All other memory is free game.
- */
-
start_kernel();
}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 037df57a99a..c5e403f6d86 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -25,12 +25,84 @@
#include <asm/kdebug.h>
#include <asm/e820.h>
#include <asm/bios_ebda.h>
+#include <asm/bootparam_utils.h>
+#include <asm/microcode.h>
-static void __init zap_identity_mappings(void)
+/*
+ * Manage page tables very early on.
+ */
+extern pgd_t early_level4_pgt[PTRS_PER_PGD];
+extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
+static unsigned int __initdata next_early_pgt = 2;
+
+/* Wipe all early page tables except for the kernel symbol map */
+static void __init reset_early_page_tables(void)
{
- pgd_t *pgd = pgd_offset_k(0UL);
- pgd_clear(pgd);
- __flush_tlb_all();
+ unsigned long i;
+
+ for (i = 0; i < PTRS_PER_PGD-1; i++)
+ early_level4_pgt[i].pgd = 0;
+
+ next_early_pgt = 0;
+
+ write_cr3(__pa(early_level4_pgt));
+}
+
+/* Create a new PMD entry */
+int __init early_make_pgtable(unsigned long address)
+{
+ unsigned long physaddr = address - __PAGE_OFFSET;
+ unsigned long i;
+ pgdval_t pgd, *pgd_p;
+ pudval_t pud, *pud_p;
+ pmdval_t pmd, *pmd_p;
+
+ /* Invalid address or early pgt is done ? */
+ if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt))
+ return -1;
+
+again:
+ pgd_p = &early_level4_pgt[pgd_index(address)].pgd;
+ pgd = *pgd_p;
+
+ /*
+ * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
+ * critical -- __PAGE_OFFSET would point us back into the dynamic
+ * range and we might end up looping forever...
+ */
+ if (pgd)
+ pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+ else {
+ if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
+ reset_early_page_tables();
+ goto again;
+ }
+
+ pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
+ for (i = 0; i < PTRS_PER_PUD; i++)
+ pud_p[i] = 0;
+ *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+ }
+ pud_p += pud_index(address);
+ pud = *pud_p;
+
+ if (pud)
+ pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+ else {
+ if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
+ reset_early_page_tables();
+ goto again;
+ }
+
+ pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ pmd_p[i] = 0;
+ *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+ }
+ pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL);
+ pmd_p[pmd_index(address)] = pmd;
+
+ return 0;
}
/* Don't add a printk in there. printk relies on the PDA which is not initialized
@@ -41,13 +113,25 @@ static void __init clear_bss(void)
(unsigned long) __bss_stop - (unsigned long) __bss_start);
}
+static unsigned long get_cmd_line_ptr(void)
+{
+ unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
+
+ cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32;
+
+ return cmd_line_ptr;
+}
+
static void __init copy_bootdata(char *real_mode_data)
{
char * command_line;
+ unsigned long cmd_line_ptr;
memcpy(&boot_params, real_mode_data, sizeof boot_params);
- if (boot_params.hdr.cmd_line_ptr) {
- command_line = __va(boot_params.hdr.cmd_line_ptr);
+ sanitize_boot_params(&boot_params);
+ cmd_line_ptr = get_cmd_line_ptr();
+ if (cmd_line_ptr) {
+ command_line = __va(cmd_line_ptr);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
}
@@ -70,54 +154,40 @@ void __init x86_64_start_kernel(char * real_mode_data)
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
+ /* Kill off the identity-map trampoline */
+ reset_early_page_tables();
+
/* clear bss before set_intr_gate with early_idt_handler */
clear_bss();
- /* Make NULL pointers segfault */
- zap_identity_mappings();
-
- max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
-
- for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
-#ifdef CONFIG_EARLY_PRINTK
+ for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
set_intr_gate(i, &early_idt_handlers[i]);
-#else
- set_intr_gate(i, early_idt_handler);
-#endif
- }
load_idt((const struct desc_ptr *)&idt_descr);
+ copy_bootdata(__va(real_mode_data));
+
+ /*
+ * Load microcode early on BSP.
+ */
+ load_ucode_bsp();
+
if (console_loglevel == 10)
early_printk("Kernel alive\n");
+ clear_page(init_level4_pgt);
+ /* set init_level4_pgt kernel high mapping*/
+ init_level4_pgt[511] = early_level4_pgt[511];
+
x86_64_start_reservations(real_mode_data);
}
void __init x86_64_start_reservations(char *real_mode_data)
{
- copy_bootdata(__va(real_mode_data));
-
- memblock_reserve(__pa_symbol(&_text),
- __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
-
-#ifdef CONFIG_BLK_DEV_INITRD
- /* Reserve INITRD */
- if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
- /* Assume only end is not page aligned */
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
- unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
- }
-#endif
+ /* version is always not zero if it is copied */
+ if (!boot_params.hdr.version)
+ copy_bootdata(__va(real_mode_data));
reserve_ebda_region();
- /*
- * At this point everything still needed from the boot loader
- * or BIOS or kernel text should be early reserved or marked not
- * RAM in e820. All other memory is free game.
- */
-
start_kernel();
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 8e7f6556028..73afd11799c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -144,6 +144,11 @@ ENTRY(startup_32)
movl %eax, pa(olpc_ofw_pgd)
#endif
+#ifdef CONFIG_MICROCODE_EARLY
+ /* Early load ucode on BSP. */
+ call load_ucode_bsp
+#endif
+
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond __brk_base. The variable
@@ -299,38 +304,59 @@ ENTRY(startup_32_smp)
movl %eax,%ss
leal -__PAGE_OFFSET(%ecx),%esp
+#ifdef CONFIG_MICROCODE_EARLY
+ /* Early load ucode on AP. */
+ call load_ucode_ap
+#endif
+
+
default_entry:
+#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
+ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
+ X86_CR0_PG)
+ movl $(CR0_STATE & ~X86_CR0_PG),%eax
+ movl %eax,%cr0
+
/*
- * New page tables may be in 4Mbyte page mode and may
- * be using the global pages.
+ * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave
+ * bits like NT set. This would confuse the debugger if this code is traced. So
+ * initialize them properly now before switching to protected mode. That means
+ * DF in particular (even though we have cleared it earlier after copying the
+ * command line) because GCC expects it.
+ */
+ pushl $0
+ popfl
+
+/*
+ * New page tables may be in 4Mbyte page mode and may be using the global pages.
*
- * NOTE! If we are on a 486 we may have no cr4 at all!
- * Specifically, cr4 exists if and only if CPUID exists
- * and has flags other than the FPU flag set.
+ * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists
+ * if and only if CPUID exists and has flags other than the FPU flag set.
*/
+ movl $-1,pa(X86_CPUID) # preset CPUID level
movl $X86_EFLAGS_ID,%ecx
pushl %ecx
- popfl
- pushfl
- popl %eax
- pushl $0
- popfl
+ popfl # set EFLAGS=ID
pushfl
- popl %edx
- xorl %edx,%eax
- testl %ecx,%eax
- jz 6f # No ID flag = no CPUID = no CR4
+ popl %eax # get EFLAGS
+ testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set?
+ jz enable_paging # hw disallowed setting of ID bit
+ # which means no CPUID and no CR4
+
+ xorl %eax,%eax
+ cpuid
+ movl %eax,pa(X86_CPUID) # save largest std CPUID function
movl $1,%eax
cpuid
- andl $~1,%edx # Ignore CPUID.FPU
- jz 6f # No flags or only CPUID.FPU = no CR4
+ andl $~1,%edx # Ignore CPUID.FPU
+ jz enable_paging # No flags or only CPUID.FPU = no CR4
movl pa(mmu_cr4_features),%eax
movl %eax,%cr4
testb $X86_CR4_PAE, %al # check if PAE is enabled
- jz 6f
+ jz enable_paging
/* Check if extended functions are implemented */
movl $0x80000000, %eax
@@ -338,7 +364,7 @@ default_entry:
/* Value must be in the range 0x80000001 to 0x8000ffff */
subl $0x80000001, %eax
cmpl $(0x8000ffff-0x80000001), %eax
- ja 6f
+ ja enable_paging
/* Clear bogus XD_DISABLE bits */
call verify_cpu
@@ -347,7 +373,7 @@ default_entry:
cpuid
/* Execute Disable bit supported? */
btl $(X86_FEATURE_NX & 31), %edx
- jnc 6f
+ jnc enable_paging
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
@@ -357,15 +383,14 @@ default_entry:
/* Make changes effective */
wrmsr
-6:
+enable_paging:
/*
* Enable paging
*/
movl $pa(initial_page_table), %eax
movl %eax,%cr3 /* set the page table pointer.. */
- movl %cr0,%eax
- orl $X86_CR0_PG,%eax
+ movl $CR0_STATE,%eax
movl %eax,%cr0 /* ..and set paging (PG) bit */
ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
1:
@@ -373,14 +398,6 @@ default_entry:
addl $__PAGE_OFFSET, %esp
/*
- * Initialize eflags. Some BIOS's leave bits like NT set. This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
- */
- pushl $0
- popfl
-
-/*
* start system 32-bit setup. We need to re-do some of the things done
* in 16-bit mode for the "real" operations.
*/
@@ -389,31 +406,11 @@ default_entry:
jz 1f # Did we do this already?
call *%eax
1:
-
-/* check if it is 486 or 386. */
+
/*
- * XXX - this does a lot of unnecessary setup. Alignment checks don't
- * apply at our cpl of 0 and the stack ought to be aligned already, and
- * we don't need to preserve eflags.
+ * Check if it is 486
*/
- movl $-1,X86_CPUID # -1 for no CPUID initially
- movb $3,X86 # at least 386
- pushfl # push EFLAGS
- popl %eax # get EFLAGS
- movl %eax,%ecx # save original EFLAGS
- xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
- pushl %eax # copy to EFLAGS
- popfl # set EFLAGS
- pushfl # get new EFLAGS
- popl %eax # put it in eax
- xorl %ecx,%eax # change in flags
- pushl %ecx # restore original EFLAGS
- popfl
- testl $0x40000,%eax # check if AC bit changed
- je is386
-
- movb $4,X86 # at least 486
- testl $0x200000,%eax # check if ID bit changed
+ cmpl $-1,X86_CPUID
je is486
/* get vendor info */
@@ -439,11 +436,10 @@ default_entry:
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
-is486: movl $0x50022,%ecx # set AM, WP, NE and MP
- jmp 2f
-
-is386: movl $2,%ecx # set MP
-2: movl %cr0,%eax
+is486:
+ movb $4,X86
+ movl $0x50022,%ecx # set AM, WP, NE and MP
+ movl %cr0,%eax
andl $0x80000011,%eax # Save PG,PE,ET
orl %ecx,%eax
movl %eax,%cr0
@@ -468,7 +464,6 @@ is386: movl $2,%ecx # set MP
xorl %eax,%eax # Clear LDT
lldt %ax
- cld # gcc2 wants the direction flag cleared at all times
pushl $0 # fake return address for unwinder
jmp *(initial_code)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 980053c4b9c..b7de3b25adb 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -47,14 +47,13 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
.code64
.globl startup_64
startup_64:
-
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
* and someone has loaded an identity mapped page table
* for us. These identity mapped page tables map all of the
* kernel pages and possibly all of memory.
*
- * %esi holds a physical pointer to real_mode_data.
+ * %rsi holds a physical pointer to real_mode_data.
*
* We come here either directly from a 64bit bootloader, or from
* arch/x86_64/boot/compressed/head.S.
@@ -66,7 +65,8 @@ startup_64:
* tables and then reload them.
*/
- /* Compute the delta between the address I am compiled to run at and the
+ /*
+ * Compute the delta between the address I am compiled to run at and the
* address I am actually running at.
*/
leaq _text(%rip), %rbp
@@ -78,45 +78,62 @@ startup_64:
testl %eax, %eax
jnz bad_address
- /* Is the address too large? */
- leaq _text(%rip), %rdx
- movq $PGDIR_SIZE, %rax
- cmpq %rax, %rdx
- jae bad_address
-
- /* Fixup the physical addresses in the page table
+ /*
+ * Is the address too large?
*/
- addq %rbp, init_level4_pgt + 0(%rip)
- addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
- addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
+ leaq _text(%rip), %rax
+ shrq $MAX_PHYSMEM_BITS, %rax
+ jnz bad_address
- addq %rbp, level3_ident_pgt + 0(%rip)
+ /*
+ * Fixup the physical addresses in the page table
+ */
+ addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
addq %rbp, level3_kernel_pgt + (510*8)(%rip)
addq %rbp, level3_kernel_pgt + (511*8)(%rip)
addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
- /* Add an Identity mapping if I am above 1G */
+ /*
+ * Set up the identity mapping for the switchover. These
+ * entries should *NOT* have the global bit set! This also
+ * creates a bunch of nonsense entries but that is fine --
+ * it avoids problems around wraparound.
+ */
leaq _text(%rip), %rdi
- andq $PMD_PAGE_MASK, %rdi
+ leaq early_level4_pgt(%rip), %rbx
movq %rdi, %rax
- shrq $PUD_SHIFT, %rax
- andq $(PTRS_PER_PUD - 1), %rax
- jz ident_complete
+ shrq $PGDIR_SHIFT, %rax
- leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
- leaq level3_ident_pgt(%rip), %rbx
- movq %rdx, 0(%rbx, %rax, 8)
+ leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx
+ movq %rdx, 0(%rbx,%rax,8)
+ movq %rdx, 8(%rbx,%rax,8)
+ addq $4096, %rdx
movq %rdi, %rax
- shrq $PMD_SHIFT, %rax
- andq $(PTRS_PER_PMD - 1), %rax
- leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx
- leaq level2_spare_pgt(%rip), %rbx
- movq %rdx, 0(%rbx, %rax, 8)
-ident_complete:
+ shrq $PUD_SHIFT, %rax
+ andl $(PTRS_PER_PUD-1), %eax
+ movq %rdx, (4096+0)(%rbx,%rax,8)
+ movq %rdx, (4096+8)(%rbx,%rax,8)
+
+ addq $8192, %rbx
+ movq %rdi, %rax
+ shrq $PMD_SHIFT, %rdi
+ addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
+ leaq (_end - 1)(%rip), %rcx
+ shrq $PMD_SHIFT, %rcx
+ subq %rdi, %rcx
+ incl %ecx
+
+1:
+ andq $(PTRS_PER_PMD - 1), %rdi
+ movq %rax, (%rbx,%rdi,8)
+ incq %rdi
+ addq $PMD_SIZE, %rax
+ decl %ecx
+ jnz 1b
/*
* Fixup the kernel text+data virtual addresses. Note that
@@ -124,7 +141,6 @@ ident_complete:
* cleanup_highmap() fixes this up along with the mappings
* beyond _end.
*/
-
leaq level2_kernel_pgt(%rip), %rdi
leaq 4096(%rdi), %r8
/* See if it is a valid page table entry */
@@ -139,17 +155,14 @@ ident_complete:
/* Fixup phys_base */
addq %rbp, phys_base(%rip)
- /* Due to ENTRY(), sometimes the empty space gets filled with
- * zeros. Better take a jmp than relying on empty space being
- * filled with 0x90 (nop)
- */
- jmp secondary_startup_64
+ movq $(early_level4_pgt - __START_KERNEL_map), %rax
+ jmp 1f
ENTRY(secondary_startup_64)
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
* and someone has loaded a mapped page table.
*
- * %esi holds a physical pointer to real_mode_data.
+ * %rsi holds a physical pointer to real_mode_data.
*
* We come here either from startup_64 (using physical addresses)
* or from trampoline.S (using virtual addresses).
@@ -159,12 +172,14 @@ ENTRY(secondary_startup_64)
* after the boot processor executes this code.
*/
+ movq $(init_level4_pgt - __START_KERNEL_map), %rax
+1:
+
/* Enable PAE mode and PGE */
- movl $(X86_CR4_PAE | X86_CR4_PGE), %eax
- movq %rax, %cr4
+ movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
+ movq %rcx, %cr4
/* Setup early boot stage 4 level pagetables. */
- movq $(init_level4_pgt - __START_KERNEL_map), %rax
addq phys_base(%rip), %rax
movq %rax, %cr3
@@ -196,7 +211,7 @@ ENTRY(secondary_startup_64)
movq %rax, %cr0
/* Setup a boot time stack */
- movq stack_start(%rip),%rsp
+ movq stack_start(%rip), %rsp
/* zero EFLAGS after setting rsp */
pushq $0
@@ -236,15 +251,33 @@ ENTRY(secondary_startup_64)
movl initial_gs+4(%rip),%edx
wrmsr
- /* esi is pointer to real mode structure with interesting info.
+ /* rsi is pointer to real mode structure with interesting info.
pass it to C */
- movl %esi, %edi
+ movq %rsi, %rdi
/* Finally jump to run C code and to be on real kernel address
* Since we are running on identity-mapped space we have to jump
* to the full 64bit address, this is only possible as indirect
* jump. In addition we need to ensure %cs is set so we make this
* a far return.
+ *
+ * Note: do not change to far jump indirect with 64bit offset.
+ *
+ * AMD does not support far jump indirect with 64bit offset.
+ * AMD64 Architecture Programmer's Manual, Volume 3: states only
+ * JMP FAR mem16:16 FF /5 Far jump indirect,
+ * with the target specified by a far pointer in memory.
+ * JMP FAR mem16:32 FF /5 Far jump indirect,
+ * with the target specified by a far pointer in memory.
+ *
+ * Intel64 does support 64bit offset.
+ * Software Developer Manual Vol 2: states:
+ * FF /5 JMP m16:16 Jump far, absolute indirect,
+ * address given in m16:16
+ * FF /5 JMP m16:32 Jump far, absolute indirect,
+ * address given in m16:32.
+ * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
+ * address given in m16:64.
*/
movq initial_code(%rip),%rax
pushq $0 # fake return address to stop unwinder
@@ -270,13 +303,13 @@ ENDPROC(start_cpu0)
/* SMP bootup changes these two */
__REFDATA
- .align 8
- ENTRY(initial_code)
+ .balign 8
+ GLOBAL(initial_code)
.quad x86_64_start_kernel
- ENTRY(initial_gs)
+ GLOBAL(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
- ENTRY(stack_start)
+ GLOBAL(stack_start)
.quad init_thread_union+THREAD_SIZE-8
.word 0
__FINITDATA
@@ -284,7 +317,7 @@ ENDPROC(start_cpu0)
bad_address:
jmp bad_address
- .section ".init.text","ax"
+ __INIT
.globl early_idt_handlers
early_idt_handlers:
# 104(%rsp) %rflags
@@ -303,6 +336,7 @@ early_idt_handlers:
i = i + 1
.endr
+/* This is global to keep gas from relaxing the jumps */
ENTRY(early_idt_handler)
cld
@@ -321,14 +355,22 @@ ENTRY(early_idt_handler)
pushq %r11 # 0(%rsp)
cmpl $__KERNEL_CS,96(%rsp)
- jne 10f
+ jne 11f
+
+ cmpl $14,72(%rsp) # Page fault?
+ jnz 10f
+ GET_CR2_INTO(%rdi) # can clobber any volatile register if pv
+ call early_make_pgtable
+ andl %eax,%eax
+ jz 20f # All good
+10:
leaq 88(%rsp),%rdi # Pointer to %rip
call early_fixup_exception
andl %eax,%eax
jnz 20f # Found an exception entry
-10:
+11:
#ifdef CONFIG_EARLY_PRINTK
GET_CR2_INTO(%r9) # can clobber any volatile register if pv
movl 80(%rsp),%r8d # error code
@@ -350,7 +392,7 @@ ENTRY(early_idt_handler)
1: hlt
jmp 1b
-20: # Exception table entry found
+20: # Exception table entry found or page table generated
popq %r11
popq %r10
popq %r9
@@ -363,6 +405,9 @@ ENTRY(early_idt_handler)
addq $16,%rsp # drop vector number and error code
decl early_recursion_flag(%rip)
INTERRUPT_RETURN
+ENDPROC(early_idt_handler)
+
+ __INITDATA
.balign 4
early_recursion_flag:
@@ -374,11 +419,10 @@ early_idt_msg:
early_idt_ripmsg:
.asciz "RIP %s\n"
#endif /* CONFIG_EARLY_PRINTK */
- .previous
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
-ENTRY(name)
+GLOBAL(name)
/* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \
@@ -388,24 +432,37 @@ ENTRY(name)
i = i + 1 ; \
.endr
+ __INITDATA
+NEXT_PAGE(early_level4_pgt)
+ .fill 511,8,0
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+
+NEXT_PAGE(early_dynamic_pgts)
+ .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
+
.data
- /*
- * This default setting generates an ident mapping at address 0x100000
- * and a mapping for the kernel that precisely maps virtual address
- * 0xffffffff80000000 to physical address 0x000000. (always using
- * 2Mbyte large pages provided by PAE mode)
- */
+
+#ifndef CONFIG_XEN
NEXT_PAGE(init_level4_pgt)
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .org init_level4_pgt + L4_PAGE_OFFSET*8, 0
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .org init_level4_pgt + L4_START_KERNEL*8, 0
+ .fill 512,8,0
+#else
+NEXT_PAGE(init_level4_pgt)
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .org init_level4_pgt + L4_PAGE_OFFSET*8, 0
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .org init_level4_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .fill 511,8,0
+ .fill 511, 8, 0
+NEXT_PAGE(level2_ident_pgt)
+ /* Since I easily can, map the first 1G.
+ * Don't set NX because code runs from these pages.
+ */
+ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+#endif
NEXT_PAGE(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
@@ -413,21 +470,6 @@ NEXT_PAGE(level3_kernel_pgt)
.quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
-NEXT_PAGE(level2_fixmap_pgt)
- .fill 506,8,0
- .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
- /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
- .fill 5,8,0
-
-NEXT_PAGE(level1_fixmap_pgt)
- .fill 512,8,0
-
-NEXT_PAGE(level2_ident_pgt)
- /* Since I easily can, map the first 1G.
- * Don't set NX because code runs from these pages.
- */
- PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
-
NEXT_PAGE(level2_kernel_pgt)
/*
* 512 MB kernel mapping. We spend a full page on this pagetable
@@ -442,11 +484,16 @@ NEXT_PAGE(level2_kernel_pgt)
PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
KERNEL_IMAGE_SIZE/PMD_SIZE)
-NEXT_PAGE(level2_spare_pgt)
- .fill 512, 8, 0
+NEXT_PAGE(level2_fixmap_pgt)
+ .fill 506,8,0
+ .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
+ /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
+ .fill 5,8,0
+
+NEXT_PAGE(level1_fixmap_pgt)
+ .fill 512,8,0
#undef PMDS
-#undef NEXT_PAGE
.data
.align 16
@@ -472,6 +519,5 @@ ENTRY(nmi_idt_table)
.skip IDT_ENTRIES * 16
__PAGE_ALIGNED_BSS
- .align PAGE_SIZE
-ENTRY(empty_zero_page)
+NEXT_PAGE(empty_zero_page)
.skip PAGE_SIZE
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index e28670f9a58..da85a8e830a 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta,
static int hpet_setup_msi_irq(unsigned int irq)
{
- if (arch_setup_hpet_msi(irq, hpet_blockid)) {
+ if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
destroy_irq(irq);
return -EINVAL;
}
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 9c3bd4a2050..0fa69127209 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -26,6 +26,7 @@ EXPORT_SYMBOL(csum_partial_copy_generic);
EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__get_user_8);
EXPORT_SYMBOL(__put_user_1);
EXPORT_SYMBOL(__put_user_2);
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8c968974253..4ddaf66ea35 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -93,8 +93,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* on system-call entry - see also fork() and the signal handling
* code.
*/
-long sys_iopl(unsigned int level, struct pt_regs *regs)
+SYSCALL_DEFINE1(iopl, unsigned int, level)
{
+ struct pt_regs *regs = current_pt_regs();
unsigned int old = (regs->flags >> 12) & 3;
struct thread_struct *t = &current->thread;
diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile
new file mode 100644
index 00000000000..0d33169cc1a
--- /dev/null
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for kernel probes
+#
+
+obj-$(CONFIG_KPROBES) += core.o
+obj-$(CONFIG_OPTPROBES) += opt.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes/common.h
index 3230b68ef29..2e9d4b5af03 100644
--- a/arch/x86/kernel/kprobes-common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
return addr;
}
#endif
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb);
+#else
+static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ return 0;
+}
+#endif
#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes/core.c
index 57916c0d3cf..e124554598e 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -58,7 +58,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
-#include "kprobes-common.h"
+#include "common.h"
void jprobe_return_end(void);
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
* Groups, and some special opcodes can not boost.
* This is non-const and volatile to keep gcc from statically
* optimizing it out, as variable_test_bit makes gcc think only
- * *(unsigned long*) is used.
+ * *(unsigned long*) is used.
*/
static volatile u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
struct __arch_relative_insn {
u8 op;
s32 raddr;
- } __attribute__((packed)) *insn;
+ } __packed *insn;
insn = (struct __arch_relative_insn *)from;
insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
return 1;
}
-#ifdef KPROBES_CAN_USE_FTRACE
-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- /*
- * Emulate singlestep (and also recover regs->ip)
- * as if there is a 5byte nop
- */
- regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
- if (unlikely(p->post_handler)) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, regs, 0);
- }
- __this_cpu_write(current_kprobe, NULL);
-}
-#endif
-
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled throughout this function.
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
} else if (kprobe_running()) {
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
-#ifdef KPROBES_CAN_USE_FTRACE
- if (kprobe_ftrace(p)) {
- skip_singlestep(p, regs, kcb);
- return 1;
- }
-#endif
- setup_singlestep(p, regs, kcb, 0);
+ if (!skip_singlestep(p, regs, kcb))
+ setup_singlestep(p, regs, kcb, 0);
return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-#ifdef KPROBES_CAN_USE_FTRACE
-/* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *regs)
-{
- struct kprobe *p;
- struct kprobe_ctlblk *kcb;
- unsigned long flags;
-
- /* Disable irq for emulating a breakpoint and avoiding preempt */
- local_irq_save(flags);
-
- p = get_kprobe((kprobe_opcode_t *)ip);
- if (unlikely(!p) || kprobe_disabled(p))
- goto end;
-
- kcb = get_kprobe_ctlblk();
- if (kprobe_running()) {
- kprobes_inc_nmissed_count(p);
- } else {
- /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
- regs->ip = ip + sizeof(kprobe_opcode_t);
-
- __this_cpu_write(current_kprobe, p);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs))
- skip_singlestep(p, regs, kcb);
- /*
- * If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe.
- */
- }
-end:
- local_irq_restore(flags);
-}
-
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
-{
- p->ainsn.insn = NULL;
- p->ainsn.boostable = -1;
- return 0;
-}
-#endif
-
int __init arch_init_kprobes(void)
{
return arch_init_optprobes();
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
new file mode 100644
index 00000000000..23ef5c556f0
--- /dev/null
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -0,0 +1,93 @@
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+#include "common.h"
+
+static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ /*
+ * Emulate singlestep (and also recover regs->ip)
+ * as if there is a 5byte nop
+ */
+ regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ __this_cpu_write(current_kprobe, NULL);
+ return 1;
+}
+
+int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ if (kprobe_ftrace(p))
+ return __skip_singlestep(p, regs, kcb);
+ else
+ return 0;
+}
+
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ unsigned long flags;
+
+ /* Disable irq for emulating a breakpoint and avoiding preempt */
+ local_irq_save(flags);
+
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto end;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+ regs->ip = ip + sizeof(kprobe_opcode_t);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ __skip_singlestep(p, regs, kcb);
+ /*
+ * If pre_handler returns !0, it sets regs->ip and
+ * resets current kprobe.
+ */
+ }
+end:
+ local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes/opt.c
index c5e410eed40..76dc6f09572 100644
--- a/arch/x86/kernel/kprobes-opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,7 +37,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
-#include "kprobes-common.h"
+#include "common.h"
unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 08b973f6403..b686a904d7c 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -43,6 +43,7 @@
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
#include <asm/kvm_guest.h>
+#include <asm/context_tracking.h>
static int kvmapf = 1;
@@ -121,6 +122,8 @@ void kvm_async_pf_task_wait(u32 token)
struct kvm_task_sleep_node n, *e;
DEFINE_WAIT(wait);
+ rcu_irq_enter();
+
spin_lock(&b->lock);
e = _find_apf_task(b, token);
if (e) {
@@ -128,6 +131,8 @@ void kvm_async_pf_task_wait(u32 token)
hlist_del(&e->link);
kfree(e);
spin_unlock(&b->lock);
+
+ rcu_irq_exit();
return;
}
@@ -152,13 +157,16 @@ void kvm_async_pf_task_wait(u32 token)
/*
* We cannot reschedule. So halt.
*/
+ rcu_irq_exit();
native_safe_halt();
+ rcu_irq_enter();
local_irq_disable();
}
}
if (!n.halted)
finish_wait(&n.wq, &wait);
+ rcu_irq_exit();
return;
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
@@ -252,10 +260,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
- rcu_irq_enter();
+ exception_enter(regs);
exit_idle();
kvm_async_pf_task_wait((u32)read_cr2());
- rcu_irq_exit();
+ exception_exit(regs);
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
@@ -289,9 +297,9 @@ static void kvm_register_steal_time(void)
memset(st, 0, sizeof(*st));
- wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED));
- printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n",
- cpu, __pa(st));
+ wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
+ pr_info("kvm-stealtime: cpu %d, msr %llx\n",
+ cpu, (unsigned long long) slow_virt_to_phys(st));
}
static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
@@ -316,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void)
return;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
- u64 pa = __pa(&__get_cpu_var(apf_reason));
+ u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason));
#ifdef CONFIG_PREEMPT
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
@@ -332,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void)
/* Size alignment is implied but just to make it explicit. */
BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
__get_cpu_var(kvm_apic_eoi) = 0;
- pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED;
+ pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi))
+ | KVM_MSR_ENABLED;
wrmsrl(MSR_KVM_PV_EOI_EN, pa);
}
@@ -497,6 +506,7 @@ static bool __init kvm_detect(void)
const struct hypervisor_x86 x86_hyper_kvm __refconst = {
.name = "KVM",
.detect = kvm_detect,
+ .x2apic_available = kvm_para_available,
};
EXPORT_SYMBOL_GPL(x86_hyper_kvm);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 220a360010f..0732f0089a3 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -162,8 +162,8 @@ int kvm_register_clock(char *txt)
int low, high, ret;
struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
- low = (int)__pa(src) | 1;
- high = ((u64)__pa(src) >> 32);
+ low = (int)slow_virt_to_phys(src) | 1;
+ high = ((u64)slow_virt_to_phys(src) >> 32);
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt);
@@ -218,6 +218,9 @@ static void kvm_shutdown(void)
void __init kvmclock_init(void)
{
unsigned long mem;
+ int size;
+
+ size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
if (!kvm_para_available())
return;
@@ -231,16 +234,14 @@ void __init kvmclock_init(void)
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock);
- mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS,
- PAGE_SIZE);
+ mem = memblock_alloc(size, PAGE_SIZE);
if (!mem)
return;
hv_clock = __va(mem);
if (kvm_register_clock("boot clock")) {
hv_clock = NULL;
- memblock_free(mem,
- sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
+ memblock_free(mem, size);
return;
}
pv_time_ops.sched_clock = kvm_clock_read;
@@ -275,7 +276,7 @@ int __init kvm_setup_vsyscall_timeinfo(void)
struct pvclock_vcpu_time_info *vcpu_time;
unsigned int size;
- size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS;
+ size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
preempt_disable();
cpu = smp_processor_id();
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b3ea9db39db..4eabc160696 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -16,125 +16,12 @@
#include <linux/io.h>
#include <linux/suspend.h>
+#include <asm/init.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/debugreg.h>
-static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
- unsigned long addr)
-{
- pud_t *pud;
- pmd_t *pmd;
- struct page *page;
- int result = -ENOMEM;
-
- addr &= PMD_MASK;
- pgd += pgd_index(addr);
- if (!pgd_present(*pgd)) {
- page = kimage_alloc_control_pages(image, 0);
- if (!page)
- goto out;
- pud = (pud_t *)page_address(page);
- clear_page(pud);
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
- }
- pud = pud_offset(pgd, addr);
- if (!pud_present(*pud)) {
- page = kimage_alloc_control_pages(image, 0);
- if (!page)
- goto out;
- pmd = (pmd_t *)page_address(page);
- clear_page(pmd);
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- }
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd))
- set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
- result = 0;
-out:
- return result;
-}
-
-static void init_level2_page(pmd_t *level2p, unsigned long addr)
-{
- unsigned long end_addr;
-
- addr &= PAGE_MASK;
- end_addr = addr + PUD_SIZE;
- while (addr < end_addr) {
- set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
- addr += PMD_SIZE;
- }
-}
-
-static int init_level3_page(struct kimage *image, pud_t *level3p,
- unsigned long addr, unsigned long last_addr)
-{
- unsigned long end_addr;
- int result;
-
- result = 0;
- addr &= PAGE_MASK;
- end_addr = addr + PGDIR_SIZE;
- while ((addr < last_addr) && (addr < end_addr)) {
- struct page *page;
- pmd_t *level2p;
-
- page = kimage_alloc_control_pages(image, 0);
- if (!page) {
- result = -ENOMEM;
- goto out;
- }
- level2p = (pmd_t *)page_address(page);
- init_level2_page(level2p, addr);
- set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
- addr += PUD_SIZE;
- }
- /* clear the unused entries */
- while (addr < end_addr) {
- pud_clear(level3p++);
- addr += PUD_SIZE;
- }
-out:
- return result;
-}
-
-
-static int init_level4_page(struct kimage *image, pgd_t *level4p,
- unsigned long addr, unsigned long last_addr)
-{
- unsigned long end_addr;
- int result;
-
- result = 0;
- addr &= PAGE_MASK;
- end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
- while ((addr < last_addr) && (addr < end_addr)) {
- struct page *page;
- pud_t *level3p;
-
- page = kimage_alloc_control_pages(image, 0);
- if (!page) {
- result = -ENOMEM;
- goto out;
- }
- level3p = (pud_t *)page_address(page);
- result = init_level3_page(image, level3p, addr, last_addr);
- if (result)
- goto out;
- set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
- addr += PGDIR_SIZE;
- }
- /* clear the unused entries */
- while (addr < end_addr) {
- pgd_clear(level4p++);
- addr += PGDIR_SIZE;
- }
-out:
- return result;
-}
-
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.pud);
@@ -184,22 +71,62 @@ err:
return result;
}
+static void *alloc_pgt_page(void *data)
+{
+ struct kimage *image = (struct kimage *)data;
+ struct page *page;
+ void *p = NULL;
+
+ page = kimage_alloc_control_pages(image, 0);
+ if (page) {
+ p = page_address(page);
+ clear_page(p);
+ }
+
+ return p;
+}
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
+ struct x86_mapping_info info = {
+ .alloc_pgt_page = alloc_pgt_page,
+ .context = image,
+ .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+ };
+ unsigned long mstart, mend;
pgd_t *level4p;
int result;
+ int i;
+
level4p = (pgd_t *)__va(start_pgtable);
- result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
- if (result)
- return result;
+ clear_page(level4p);
+ for (i = 0; i < nr_pfn_mapped; i++) {
+ mstart = pfn_mapped[i].start << PAGE_SHIFT;
+ mend = pfn_mapped[i].end << PAGE_SHIFT;
+
+ result = kernel_ident_mapping_init(&info,
+ level4p, mstart, mend);
+ if (result)
+ return result;
+ }
+
/*
- * image->start may be outside 0 ~ max_pfn, for example when
- * jump back to original kernel from kexeced kernel
+ * segments's mem ranges could be outside 0 ~ max_pfn,
+ * for example when jump back to original kernel from kexeced kernel.
+ * or first kernel is booted with user mem map, and second kernel
+ * could be loaded out of that range.
*/
- result = init_one_level2_page(image, level4p, image->start);
- if (result)
- return result;
+ for (i = 0; i < image->nr_segments; i++) {
+ mstart = image->segment[i].mem;
+ mend = mstart + image->segment[i].memsz;
+
+ result = kernel_ident_mapping_init(&info,
+ level4p, mstart, mend);
+
+ if (result)
+ return result;
+ }
+
return init_transition_pgtable(image, level4p);
}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 3a04b224d0c..22db92bbdf1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -364,10 +364,7 @@ static struct attribute_group mc_attr_group = {
static void microcode_fini_cpu(int cpu)
{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
microcode_ops->microcode_fini_cpu(cpu);
- uci->valid = 0;
}
static enum ucode_state microcode_resume_cpu(int cpu)
@@ -383,6 +380,10 @@ static enum ucode_state microcode_resume_cpu(int cpu)
static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
{
enum ucode_state ustate;
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ if (uci && uci->valid)
+ return UCODE_OK;
if (collect_cpu_info(cpu))
return UCODE_ERROR;
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
new file mode 100644
index 00000000000..577db8417d1
--- /dev/null
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -0,0 +1,76 @@
+/*
+ * X86 CPU microcode early update for Linux
+ *
+ * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ * H Peter Anvin" <hpa@zytor.com>
+ *
+ * This driver allows to early upgrade microcode on Intel processors
+ * belonging to IA-32 family - PentiumPro, Pentium II,
+ * Pentium III, Xeon, Pentium 4, etc.
+ *
+ * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
+ * Software Developer's Manual.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <asm/microcode_intel.h>
+#include <asm/processor.h>
+
+#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
+#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
+#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
+#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
+#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
+#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
+#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
+
+#define CPUID_IS(a, b, c, ebx, ecx, edx) \
+ (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
+
+/*
+ * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
+ * x86_vendor() gets vendor id for BSP.
+ *
+ * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
+ * coding, we still use x86_vendor() to get vendor id for AP.
+ *
+ * x86_vendor() gets vendor information directly through cpuid.
+ */
+static int __cpuinit x86_vendor(void)
+{
+ u32 eax = 0x00000000;
+ u32 ebx, ecx = 0, edx;
+
+ if (!have_cpuid_p())
+ return X86_VENDOR_UNKNOWN;
+
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
+ return X86_VENDOR_INTEL;
+
+ if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
+ return X86_VENDOR_AMD;
+
+ return X86_VENDOR_UNKNOWN;
+}
+
+void __init load_ucode_bsp(void)
+{
+ int vendor = x86_vendor();
+
+ if (vendor == X86_VENDOR_INTEL)
+ load_ucode_intel_bsp();
+}
+
+void __cpuinit load_ucode_ap(void)
+{
+ int vendor = x86_vendor();
+
+ if (vendor == X86_VENDOR_INTEL)
+ load_ucode_intel_ap();
+}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 3544aed3933..5fb2cebf556 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -79,7 +79,7 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
-#include <asm/microcode.h>
+#include <asm/microcode_intel.h>
#include <asm/processor.h>
#include <asm/msr.h>
@@ -87,59 +87,6 @@ MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
MODULE_LICENSE("GPL");
-struct microcode_header_intel {
- unsigned int hdrver;
- unsigned int rev;
- unsigned int date;
- unsigned int sig;
- unsigned int cksum;
- unsigned int ldrver;
- unsigned int pf;
- unsigned int datasize;
- unsigned int totalsize;
- unsigned int reserved[3];
-};
-
-struct microcode_intel {
- struct microcode_header_intel hdr;
- unsigned int bits[0];
-};
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
- unsigned int sig;
- unsigned int pf;
- unsigned int cksum;
-};
-
-struct extended_sigtable {
- unsigned int count;
- unsigned int cksum;
- unsigned int reserved[3];
- struct extended_signature sigs[0];
-};
-
-#define DEFAULT_UCODE_DATASIZE (2000)
-#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
-#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
-#define DWSIZE (sizeof(u32))
-
-#define get_totalsize(mc) \
- (((struct microcode_intel *)mc)->hdr.totalsize ? \
- ((struct microcode_intel *)mc)->hdr.totalsize : \
- DEFAULT_UCODE_TOTALSIZE)
-
-#define get_datasize(mc) \
- (((struct microcode_intel *)mc)->hdr.datasize ? \
- ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
-
-#define sigmatch(s1, s2, p1, p2) \
- (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
-
-#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-
static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
@@ -162,128 +109,25 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
return 0;
}
-static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
-{
- return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
-}
-
-static inline int
-update_match_revision(struct microcode_header_intel *mc_header, int rev)
-{
- return (mc_header->rev <= rev) ? 0 : 1;
-}
-
-static int microcode_sanity_check(void *mc)
-{
- unsigned long total_size, data_size, ext_table_size;
- struct microcode_header_intel *mc_header = mc;
- struct extended_sigtable *ext_header = NULL;
- int sum, orig_sum, ext_sigcount = 0, i;
- struct extended_signature *ext_sig;
-
- total_size = get_totalsize(mc_header);
- data_size = get_datasize(mc_header);
-
- if (data_size + MC_HEADER_SIZE > total_size) {
- pr_err("error! Bad data size in microcode data file\n");
- return -EINVAL;
- }
-
- if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
- pr_err("error! Unknown microcode update format\n");
- return -EINVAL;
- }
- ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
- if (ext_table_size) {
- if ((ext_table_size < EXT_HEADER_SIZE)
- || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
- pr_err("error! Small exttable size in microcode data file\n");
- return -EINVAL;
- }
- ext_header = mc + MC_HEADER_SIZE + data_size;
- if (ext_table_size != exttable_size(ext_header)) {
- pr_err("error! Bad exttable size in microcode data file\n");
- return -EFAULT;
- }
- ext_sigcount = ext_header->count;
- }
-
- /* check extended table checksum */
- if (ext_table_size) {
- int ext_table_sum = 0;
- int *ext_tablep = (int *)ext_header;
-
- i = ext_table_size / DWSIZE;
- while (i--)
- ext_table_sum += ext_tablep[i];
- if (ext_table_sum) {
- pr_warning("aborting, bad extended signature table checksum\n");
- return -EINVAL;
- }
- }
-
- /* calculate the checksum */
- orig_sum = 0;
- i = (MC_HEADER_SIZE + data_size) / DWSIZE;
- while (i--)
- orig_sum += ((int *)mc)[i];
- if (orig_sum) {
- pr_err("aborting, bad checksum\n");
- return -EINVAL;
- }
- if (!ext_table_size)
- return 0;
- /* check extended signature checksum */
- for (i = 0; i < ext_sigcount; i++) {
- ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
- EXT_SIGNATURE_SIZE * i;
- sum = orig_sum
- - (mc_header->sig + mc_header->pf + mc_header->cksum)
- + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
- if (sum) {
- pr_err("aborting, bad checksum\n");
- return -EINVAL;
- }
- }
- return 0;
-}
-
/*
* return 0 - no update found
* return 1 - found update
*/
-static int
-get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
+static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
{
- struct microcode_header_intel *mc_header = mc;
- struct extended_sigtable *ext_header;
- unsigned long total_size = get_totalsize(mc_header);
- int ext_sigcount, i;
- struct extended_signature *ext_sig;
-
- if (!update_match_revision(mc_header, rev))
- return 0;
-
- if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf))
- return 1;
+ struct cpu_signature cpu_sig;
+ unsigned int csig, cpf, crev;
- /* Look for ext. headers: */
- if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
- return 0;
+ collect_cpu_info(cpu, &cpu_sig);
- ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
- ext_sigcount = ext_header->count;
- ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+ csig = cpu_sig.sig;
+ cpf = cpu_sig.pf;
+ crev = cpu_sig.rev;
- for (i = 0; i < ext_sigcount; i++) {
- if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf))
- return 1;
- ext_sig++;
- }
- return 0;
+ return get_matching_microcode(csig, cpf, mc_intel, crev);
}
-static int apply_microcode(int cpu)
+int apply_microcode(int cpu)
{
struct microcode_intel *mc_intel;
struct ucode_cpu_info *uci;
@@ -300,6 +144,14 @@ static int apply_microcode(int cpu)
if (mc_intel == NULL)
return 0;
+ /*
+ * Microcode on this CPU could be updated earlier. Only apply the
+ * microcode patch in mc_intel when it is newer than the one on this
+ * CPU.
+ */
+ if (get_matching_mc(mc_intel, cpu) == 0)
+ return 0;
+
/* write microcode via MSR 0x79 */
wrmsr(MSR_IA32_UCODE_WRITE,
(unsigned long) mc_intel->bits,
@@ -338,6 +190,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
unsigned int leftover = size;
enum ucode_state state = UCODE_OK;
unsigned int curr_mc_size = 0;
+ unsigned int csig, cpf;
while (leftover) {
struct microcode_header_intel mc_header;
@@ -362,11 +215,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
}
if (get_ucode_data(mc, ucode_ptr, mc_size) ||
- microcode_sanity_check(mc) < 0) {
+ microcode_sanity_check(mc, 1) < 0) {
break;
}
- if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
+ csig = uci->cpu_sig.sig;
+ cpf = uci->cpu_sig.pf;
+ if (get_matching_microcode(csig, cpf, mc, new_rev)) {
vfree(new_mc);
new_rev = mc_header.rev;
new_mc = mc;
@@ -393,6 +248,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
vfree(uci->mc);
uci->mc = (struct microcode_intel *)new_mc;
+ /*
+ * If early loading microcode is supported, save this mc into
+ * permanent memory. So it will be loaded early when a CPU is hot added
+ * or resumes.
+ */
+ save_mc_for_early(new_mc);
+
pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
out:
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
new file mode 100644
index 00000000000..7890bc83895
--- /dev/null
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -0,0 +1,796 @@
+/*
+ * Intel CPU microcode early update for Linux
+ *
+ * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ * H Peter Anvin" <hpa@zytor.com>
+ *
+ * This allows to early upgrade microcode on Intel processors
+ * belonging to IA-32 family - PentiumPro, Pentium II,
+ * Pentium III, Xeon, Pentium 4, etc.
+ *
+ * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
+ * Software Developer's Manual.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/earlycpio.h>
+#include <linux/initrd.h>
+#include <linux/cpu.h>
+#include <asm/msr.h>
+#include <asm/microcode_intel.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/setup.h>
+
+unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+struct mc_saved_data {
+ unsigned int mc_saved_count;
+ struct microcode_intel **mc_saved;
+} mc_saved_data;
+
+static enum ucode_state __cpuinit
+generic_load_microcode_early(struct microcode_intel **mc_saved_p,
+ unsigned int mc_saved_count,
+ struct ucode_cpu_info *uci)
+{
+ struct microcode_intel *ucode_ptr, *new_mc = NULL;
+ int new_rev = uci->cpu_sig.rev;
+ enum ucode_state state = UCODE_OK;
+ unsigned int mc_size;
+ struct microcode_header_intel *mc_header;
+ unsigned int csig = uci->cpu_sig.sig;
+ unsigned int cpf = uci->cpu_sig.pf;
+ int i;
+
+ for (i = 0; i < mc_saved_count; i++) {
+ ucode_ptr = mc_saved_p[i];
+
+ mc_header = (struct microcode_header_intel *)ucode_ptr;
+ mc_size = get_totalsize(mc_header);
+ if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
+ new_rev = mc_header->rev;
+ new_mc = ucode_ptr;
+ }
+ }
+
+ if (!new_mc) {
+ state = UCODE_NFOUND;
+ goto out;
+ }
+
+ uci->mc = (struct microcode_intel *)new_mc;
+out:
+ return state;
+}
+
+static void __cpuinit
+microcode_pointer(struct microcode_intel **mc_saved,
+ unsigned long *mc_saved_in_initrd,
+ unsigned long initrd_start, int mc_saved_count)
+{
+ int i;
+
+ for (i = 0; i < mc_saved_count; i++)
+ mc_saved[i] = (struct microcode_intel *)
+ (mc_saved_in_initrd[i] + initrd_start);
+}
+
+#ifdef CONFIG_X86_32
+static void __cpuinit
+microcode_phys(struct microcode_intel **mc_saved_tmp,
+ struct mc_saved_data *mc_saved_data)
+{
+ int i;
+ struct microcode_intel ***mc_saved;
+
+ mc_saved = (struct microcode_intel ***)
+ __pa_symbol(&mc_saved_data->mc_saved);
+ for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
+ struct microcode_intel *p;
+
+ p = *(struct microcode_intel **)
+ __pa(mc_saved_data->mc_saved + i);
+ mc_saved_tmp[i] = (struct microcode_intel *)__pa(p);
+ }
+}
+#endif
+
+static enum ucode_state __cpuinit
+load_microcode(struct mc_saved_data *mc_saved_data,
+ unsigned long *mc_saved_in_initrd,
+ unsigned long initrd_start,
+ struct ucode_cpu_info *uci)
+{
+ struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+ unsigned int count = mc_saved_data->mc_saved_count;
+
+ if (!mc_saved_data->mc_saved) {
+ microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
+ initrd_start, count);
+
+ return generic_load_microcode_early(mc_saved_tmp, count, uci);
+ } else {
+#ifdef CONFIG_X86_32
+ microcode_phys(mc_saved_tmp, mc_saved_data);
+ return generic_load_microcode_early(mc_saved_tmp, count, uci);
+#else
+ return generic_load_microcode_early(mc_saved_data->mc_saved,
+ count, uci);
+#endif
+ }
+}
+
+static u8 get_x86_family(unsigned long sig)
+{
+ u8 x86;
+
+ x86 = (sig >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (sig >> 20) & 0xff;
+
+ return x86;
+}
+
+static u8 get_x86_model(unsigned long sig)
+{
+ u8 x86, x86_model;
+
+ x86 = get_x86_family(sig);
+ x86_model = (sig >> 4) & 0xf;
+
+ if (x86 == 0x6 || x86 == 0xf)
+ x86_model += ((sig >> 16) & 0xf) << 4;
+
+ return x86_model;
+}
+
+/*
+ * Given CPU signature and a microcode patch, this function finds if the
+ * microcode patch has matching family and model with the CPU.
+ */
+static enum ucode_state
+matching_model_microcode(struct microcode_header_intel *mc_header,
+ unsigned long sig)
+{
+ u8 x86, x86_model;
+ u8 x86_ucode, x86_model_ucode;
+ struct extended_sigtable *ext_header;
+ unsigned long total_size = get_totalsize(mc_header);
+ unsigned long data_size = get_datasize(mc_header);
+ int ext_sigcount, i;
+ struct extended_signature *ext_sig;
+
+ x86 = get_x86_family(sig);
+ x86_model = get_x86_model(sig);
+
+ x86_ucode = get_x86_family(mc_header->sig);
+ x86_model_ucode = get_x86_model(mc_header->sig);
+
+ if (x86 == x86_ucode && x86_model == x86_model_ucode)
+ return UCODE_OK;
+
+ /* Look for ext. headers: */
+ if (total_size <= data_size + MC_HEADER_SIZE)
+ return UCODE_NFOUND;
+
+ ext_header = (struct extended_sigtable *)
+ mc_header + data_size + MC_HEADER_SIZE;
+ ext_sigcount = ext_header->count;
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
+ for (i = 0; i < ext_sigcount; i++) {
+ x86_ucode = get_x86_family(ext_sig->sig);
+ x86_model_ucode = get_x86_model(ext_sig->sig);
+
+ if (x86 == x86_ucode && x86_model == x86_model_ucode)
+ return UCODE_OK;
+
+ ext_sig++;
+ }
+
+ return UCODE_NFOUND;
+}
+
+static int
+save_microcode(struct mc_saved_data *mc_saved_data,
+ struct microcode_intel **mc_saved_src,
+ unsigned int mc_saved_count)
+{
+ int i, j;
+ struct microcode_intel **mc_saved_p;
+ int ret;
+
+ if (!mc_saved_count)
+ return -EINVAL;
+
+ /*
+ * Copy new microcode data.
+ */
+ mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
+ GFP_KERNEL);
+ if (!mc_saved_p)
+ return -ENOMEM;
+
+ for (i = 0; i < mc_saved_count; i++) {
+ struct microcode_intel *mc = mc_saved_src[i];
+ struct microcode_header_intel *mc_header = &mc->hdr;
+ unsigned long mc_size = get_totalsize(mc_header);
+ mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
+ if (!mc_saved_p[i]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ if (!mc_saved_src[i]) {
+ ret = -EINVAL;
+ goto err;
+ }
+ memcpy(mc_saved_p[i], mc, mc_size);
+ }
+
+ /*
+ * Point to newly saved microcode.
+ */
+ mc_saved_data->mc_saved = mc_saved_p;
+ mc_saved_data->mc_saved_count = mc_saved_count;
+
+ return 0;
+
+err:
+ for (j = 0; j <= i; j++)
+ kfree(mc_saved_p[j]);
+ kfree(mc_saved_p);
+
+ return ret;
+}
+
+/*
+ * A microcode patch in ucode_ptr is saved into mc_saved
+ * - if it has matching signature and newer revision compared to an existing
+ * patch mc_saved.
+ * - or if it is a newly discovered microcode patch.
+ *
+ * The microcode patch should have matching model with CPU.
+ */
+static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
+ unsigned int *mc_saved_count_p)
+{
+ int i;
+ int found = 0;
+ unsigned int mc_saved_count = *mc_saved_count_p;
+ struct microcode_header_intel *mc_header;
+
+ mc_header = (struct microcode_header_intel *)ucode_ptr;
+ for (i = 0; i < mc_saved_count; i++) {
+ unsigned int sig, pf;
+ unsigned int new_rev;
+ struct microcode_header_intel *mc_saved_header =
+ (struct microcode_header_intel *)mc_saved[i];
+ sig = mc_saved_header->sig;
+ pf = mc_saved_header->pf;
+ new_rev = mc_header->rev;
+
+ if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
+ found = 1;
+ if (update_match_revision(mc_header, new_rev)) {
+ /*
+ * Found an older ucode saved before.
+ * Replace the older one with this newer
+ * one.
+ */
+ mc_saved[i] =
+ (struct microcode_intel *)ucode_ptr;
+ break;
+ }
+ }
+ }
+ if (i >= mc_saved_count && !found)
+ /*
+ * This ucode is first time discovered in ucode file.
+ * Save it to memory.
+ */
+ mc_saved[mc_saved_count++] =
+ (struct microcode_intel *)ucode_ptr;
+
+ *mc_saved_count_p = mc_saved_count;
+}
+
+/*
+ * Get microcode matching with BSP's model. Only CPUs with the same model as
+ * BSP can stay in the platform.
+ */
+static enum ucode_state __init
+get_matching_model_microcode(int cpu, unsigned long start,
+ void *data, size_t size,
+ struct mc_saved_data *mc_saved_data,
+ unsigned long *mc_saved_in_initrd,
+ struct ucode_cpu_info *uci)
+{
+ u8 *ucode_ptr = data;
+ unsigned int leftover = size;
+ enum ucode_state state = UCODE_OK;
+ unsigned int mc_size;
+ struct microcode_header_intel *mc_header;
+ struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+ unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
+ int i;
+
+ while (leftover) {
+ mc_header = (struct microcode_header_intel *)ucode_ptr;
+
+ mc_size = get_totalsize(mc_header);
+ if (!mc_size || mc_size > leftover ||
+ microcode_sanity_check(ucode_ptr, 0) < 0)
+ break;
+
+ leftover -= mc_size;
+
+ /*
+ * Since APs with same family and model as the BSP may boot in
+ * the platform, we need to find and save microcode patches
+ * with the same family and model as the BSP.
+ */
+ if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
+ UCODE_OK) {
+ ucode_ptr += mc_size;
+ continue;
+ }
+
+ _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
+
+ ucode_ptr += mc_size;
+ }
+
+ if (leftover) {
+ state = UCODE_ERROR;
+ goto out;
+ }
+
+ if (mc_saved_count == 0) {
+ state = UCODE_NFOUND;
+ goto out;
+ }
+
+ for (i = 0; i < mc_saved_count; i++)
+ mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
+
+ mc_saved_data->mc_saved_count = mc_saved_count;
+out:
+ return state;
+}
+
+#define native_rdmsr(msr, val1, val2) \
+do { \
+ u64 __val = native_read_msr((msr)); \
+ (void)((val1) = (u32)__val); \
+ (void)((val2) = (u32)(__val >> 32)); \
+} while (0)
+
+#define native_wrmsr(msr, low, high) \
+ native_write_msr(msr, low, high);
+
+static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci)
+{
+ unsigned int val[2];
+ u8 x86, x86_model;
+ struct cpu_signature csig;
+ unsigned int eax, ebx, ecx, edx;
+
+ csig.sig = 0;
+ csig.pf = 0;
+ csig.rev = 0;
+
+ memset(uci, 0, sizeof(*uci));
+
+ eax = 0x00000001;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ csig.sig = eax;
+
+ x86 = get_x86_family(csig.sig);
+ x86_model = get_x86_model(csig.sig);
+
+ if ((x86_model >= 5) || (x86 > 6)) {
+ /* get processor flags from MSR 0x17 */
+ native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+ csig.pf = 1 << ((val[1] >> 18) & 7);
+ }
+ native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+ /* As documented in the SDM: Do a CPUID 1 here */
+ sync_core();
+
+ /* get the current revision from MSR 0x8B */
+ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+
+ csig.rev = val[1];
+
+ uci->cpu_sig = csig;
+ uci->valid = 1;
+
+ return 0;
+}
+
+#ifdef DEBUG
+static void __ref show_saved_mc(void)
+{
+ int i, j;
+ unsigned int sig, pf, rev, total_size, data_size, date;
+ struct ucode_cpu_info uci;
+
+ if (mc_saved_data.mc_saved_count == 0) {
+ pr_debug("no micorcode data saved.\n");
+ return;
+ }
+ pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
+
+ collect_cpu_info_early(&uci);
+
+ sig = uci.cpu_sig.sig;
+ pf = uci.cpu_sig.pf;
+ rev = uci.cpu_sig.rev;
+ pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
+ smp_processor_id(), sig, pf, rev);
+
+ for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
+ struct microcode_header_intel *mc_saved_header;
+ struct extended_sigtable *ext_header;
+ int ext_sigcount;
+ struct extended_signature *ext_sig;
+
+ mc_saved_header = (struct microcode_header_intel *)
+ mc_saved_data.mc_saved[i];
+ sig = mc_saved_header->sig;
+ pf = mc_saved_header->pf;
+ rev = mc_saved_header->rev;
+ total_size = get_totalsize(mc_saved_header);
+ data_size = get_datasize(mc_saved_header);
+ date = mc_saved_header->date;
+
+ pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
+ i, sig, pf, rev, total_size,
+ date & 0xffff,
+ date >> 24,
+ (date >> 16) & 0xff);
+
+ /* Look for ext. headers: */
+ if (total_size <= data_size + MC_HEADER_SIZE)
+ continue;
+
+ ext_header = (struct extended_sigtable *)
+ mc_saved_header + data_size + MC_HEADER_SIZE;
+ ext_sigcount = ext_header->count;
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
+ for (j = 0; j < ext_sigcount; j++) {
+ sig = ext_sig->sig;
+ pf = ext_sig->pf;
+
+ pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
+ j, sig, pf);
+
+ ext_sig++;
+ }
+
+ }
+}
+#else
+static inline void show_saved_mc(void)
+{
+}
+#endif
+
+#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
+/*
+ * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
+ * hot added or resumes.
+ *
+ * Please make sure this mc should be a valid microcode patch before calling
+ * this function.
+ */
+int save_mc_for_early(u8 *mc)
+{
+ struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+ unsigned int mc_saved_count_init;
+ unsigned int mc_saved_count;
+ struct microcode_intel **mc_saved;
+ int ret = 0;
+ int i;
+
+ /*
+ * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
+ * hotplug.
+ */
+ cpu_hotplug_driver_lock();
+
+ mc_saved_count_init = mc_saved_data.mc_saved_count;
+ mc_saved_count = mc_saved_data.mc_saved_count;
+ mc_saved = mc_saved_data.mc_saved;
+
+ if (mc_saved && mc_saved_count)
+ memcpy(mc_saved_tmp, mc_saved,
+ mc_saved_count * sizeof(struct mirocode_intel *));
+ /*
+ * Save the microcode patch mc in mc_save_tmp structure if it's a newer
+ * version.
+ */
+
+ _save_mc(mc_saved_tmp, mc, &mc_saved_count);
+
+ /*
+ * Save the mc_save_tmp in global mc_saved_data.
+ */
+ ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
+ if (ret) {
+ pr_err("Can not save microcode patch.\n");
+ goto out;
+ }
+
+ show_saved_mc();
+
+ /*
+ * Free old saved microcod data.
+ */
+ if (mc_saved) {
+ for (i = 0; i < mc_saved_count_init; i++)
+ kfree(mc_saved[i]);
+ kfree(mc_saved);
+ }
+
+out:
+ cpu_hotplug_driver_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(save_mc_for_early);
+#endif
+
+static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
+static __init enum ucode_state
+scan_microcode(unsigned long start, unsigned long end,
+ struct mc_saved_data *mc_saved_data,
+ unsigned long *mc_saved_in_initrd,
+ struct ucode_cpu_info *uci)
+{
+ unsigned int size = end - start + 1;
+ struct cpio_data cd;
+ long offset = 0;
+#ifdef CONFIG_X86_32
+ char *p = (char *)__pa_symbol(ucode_name);
+#else
+ char *p = ucode_name;
+#endif
+
+ cd.data = NULL;
+ cd.size = 0;
+
+ cd = find_cpio_data(p, (void *)start, size, &offset);
+ if (!cd.data)
+ return UCODE_ERROR;
+
+
+ return get_matching_model_microcode(0, start, cd.data, cd.size,
+ mc_saved_data, mc_saved_in_initrd,
+ uci);
+}
+
+/*
+ * Print ucode update info.
+ */
+static void __cpuinit
+print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
+{
+ int cpu = smp_processor_id();
+
+ pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
+ cpu,
+ uci->cpu_sig.rev,
+ date & 0xffff,
+ date >> 24,
+ (date >> 16) & 0xff);
+}
+
+#ifdef CONFIG_X86_32
+
+static int delay_ucode_info;
+static int current_mc_date;
+
+/*
+ * Print early updated ucode info after printk works. This is delayed info dump.
+ */
+void __cpuinit show_ucode_info_early(void)
+{
+ struct ucode_cpu_info uci;
+
+ if (delay_ucode_info) {
+ collect_cpu_info_early(&uci);
+ print_ucode_info(&uci, current_mc_date);
+ delay_ucode_info = 0;
+ }
+}
+
+/*
+ * At this point, we can not call printk() yet. Keep microcode patch number in
+ * mc_saved_data.mc_saved and delay printing microcode info in
+ * show_ucode_info_early() until printk() works.
+ */
+static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
+{
+ struct microcode_intel *mc_intel;
+ int *delay_ucode_info_p;
+ int *current_mc_date_p;
+
+ mc_intel = uci->mc;
+ if (mc_intel == NULL)
+ return;
+
+ delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info);
+ current_mc_date_p = (int *)__pa_symbol(&current_mc_date);
+
+ *delay_ucode_info_p = 1;
+ *current_mc_date_p = mc_intel->hdr.date;
+}
+#else
+
+/*
+ * Flush global tlb. We only do this in x86_64 where paging has been enabled
+ * already and PGE should be enabled as well.
+ */
+static inline void __cpuinit flush_tlb_early(void)
+{
+ __native_flush_tlb_global_irq_disabled();
+}
+
+static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
+{
+ struct microcode_intel *mc_intel;
+
+ mc_intel = uci->mc;
+ if (mc_intel == NULL)
+ return;
+
+ print_ucode_info(uci, mc_intel->hdr.date);
+}
+#endif
+
+static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
+ struct ucode_cpu_info *uci)
+{
+ struct microcode_intel *mc_intel;
+ unsigned int val[2];
+
+ mc_intel = uci->mc;
+ if (mc_intel == NULL)
+ return 0;
+
+ /* write microcode via MSR 0x79 */
+ native_wrmsr(MSR_IA32_UCODE_WRITE,
+ (unsigned long) mc_intel->bits,
+ (unsigned long) mc_intel->bits >> 16 >> 16);
+ native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+ /* As documented in the SDM: Do a CPUID 1 here */
+ sync_core();
+
+ /* get the current revision from MSR 0x8B */
+ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+ if (val[1] != mc_intel->hdr.rev)
+ return -1;
+
+#ifdef CONFIG_X86_64
+ /* Flush global tlb. This is precaution. */
+ flush_tlb_early();
+#endif
+ uci->cpu_sig.rev = val[1];
+
+ print_ucode(uci);
+
+ return 0;
+}
+
+/*
+ * This function converts microcode patch offsets previously stored in
+ * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
+ */
+int __init save_microcode_in_initrd(void)
+{
+ unsigned int count = mc_saved_data.mc_saved_count;
+ struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
+ int ret = 0;
+
+ if (count == 0)
+ return ret;
+
+ microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
+ ret = save_microcode(&mc_saved_data, mc_saved, count);
+ if (ret)
+ pr_err("Can not save microcod patches from initrd");
+
+ show_saved_mc();
+
+ return ret;
+}
+
+static void __init
+_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
+ unsigned long *mc_saved_in_initrd,
+ unsigned long initrd_start_early,
+ unsigned long initrd_end_early,
+ struct ucode_cpu_info *uci)
+{
+ collect_cpu_info_early(uci);
+ scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
+ mc_saved_in_initrd, uci);
+ load_microcode(mc_saved_data, mc_saved_in_initrd,
+ initrd_start_early, uci);
+ apply_microcode_early(mc_saved_data, uci);
+}
+
+void __init
+load_ucode_intel_bsp(void)
+{
+ u64 ramdisk_image, ramdisk_size;
+ unsigned long initrd_start_early, initrd_end_early;
+ struct ucode_cpu_info uci;
+#ifdef CONFIG_X86_32
+ struct boot_params *boot_params_p;
+
+ boot_params_p = (struct boot_params *)__pa_symbol(&boot_params);
+ ramdisk_image = boot_params_p->hdr.ramdisk_image;
+ ramdisk_size = boot_params_p->hdr.ramdisk_size;
+ initrd_start_early = ramdisk_image;
+ initrd_end_early = initrd_start_early + ramdisk_size;
+
+ _load_ucode_intel_bsp(
+ (struct mc_saved_data *)__pa_symbol(&mc_saved_data),
+ (unsigned long *)__pa_symbol(&mc_saved_in_initrd),
+ initrd_start_early, initrd_end_early, &uci);
+#else
+ ramdisk_image = boot_params.hdr.ramdisk_image;
+ ramdisk_size = boot_params.hdr.ramdisk_size;
+ initrd_start_early = ramdisk_image + PAGE_OFFSET;
+ initrd_end_early = initrd_start_early + ramdisk_size;
+
+ _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
+ initrd_start_early, initrd_end_early, &uci);
+#endif
+}
+
+void __cpuinit load_ucode_intel_ap(void)
+{
+ struct mc_saved_data *mc_saved_data_p;
+ struct ucode_cpu_info uci;
+ unsigned long *mc_saved_in_initrd_p;
+ unsigned long initrd_start_addr;
+#ifdef CONFIG_X86_32
+ unsigned long *initrd_start_p;
+
+ mc_saved_in_initrd_p =
+ (unsigned long *)__pa_symbol(mc_saved_in_initrd);
+ mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data);
+ initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start);
+ initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p);
+#else
+ mc_saved_data_p = &mc_saved_data;
+ mc_saved_in_initrd_p = mc_saved_in_initrd;
+ initrd_start_addr = initrd_start;
+#endif
+
+ /*
+ * If there is no valid ucode previously saved in memory, no need to
+ * update ucode on this AP.
+ */
+ if (mc_saved_data_p->mc_saved_count == 0)
+ return;
+
+ collect_cpu_info_early(&uci);
+ load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
+ initrd_start_addr, &uci);
+ apply_microcode_early(mc_saved_data_p, &uci);
+}
diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c
new file mode 100644
index 00000000000..ce69320d017
--- /dev/null
+++ b/arch/x86/kernel/microcode_intel_lib.c
@@ -0,0 +1,174 @@
+/*
+ * Intel CPU Microcode Update Driver for Linux
+ *
+ * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ * H Peter Anvin" <hpa@zytor.com>
+ *
+ * This driver allows to upgrade microcode on Intel processors
+ * belonging to IA-32 family - PentiumPro, Pentium II,
+ * Pentium III, Xeon, Pentium 4, etc.
+ *
+ * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
+ * Software Developer's Manual
+ * Order Number 253668 or free download from:
+ *
+ * http://developer.intel.com/Assets/PDF/manual/253668.pdf
+ *
+ * For more information, go to http://www.urbanmyth.org/microcode
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/microcode_intel.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+
+static inline int
+update_match_cpu(unsigned int csig, unsigned int cpf,
+ unsigned int sig, unsigned int pf)
+{
+ return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
+}
+
+int
+update_match_revision(struct microcode_header_intel *mc_header, int rev)
+{
+ return (mc_header->rev <= rev) ? 0 : 1;
+}
+
+int microcode_sanity_check(void *mc, int print_err)
+{
+ unsigned long total_size, data_size, ext_table_size;
+ struct microcode_header_intel *mc_header = mc;
+ struct extended_sigtable *ext_header = NULL;
+ int sum, orig_sum, ext_sigcount = 0, i;
+ struct extended_signature *ext_sig;
+
+ total_size = get_totalsize(mc_header);
+ data_size = get_datasize(mc_header);
+
+ if (data_size + MC_HEADER_SIZE > total_size) {
+ if (print_err)
+ pr_err("error! Bad data size in microcode data file\n");
+ return -EINVAL;
+ }
+
+ if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
+ if (print_err)
+ pr_err("error! Unknown microcode update format\n");
+ return -EINVAL;
+ }
+ ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
+ if (ext_table_size) {
+ if ((ext_table_size < EXT_HEADER_SIZE)
+ || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
+ if (print_err)
+ pr_err("error! Small exttable size in microcode data file\n");
+ return -EINVAL;
+ }
+ ext_header = mc + MC_HEADER_SIZE + data_size;
+ if (ext_table_size != exttable_size(ext_header)) {
+ if (print_err)
+ pr_err("error! Bad exttable size in microcode data file\n");
+ return -EFAULT;
+ }
+ ext_sigcount = ext_header->count;
+ }
+
+ /* check extended table checksum */
+ if (ext_table_size) {
+ int ext_table_sum = 0;
+ int *ext_tablep = (int *)ext_header;
+
+ i = ext_table_size / DWSIZE;
+ while (i--)
+ ext_table_sum += ext_tablep[i];
+ if (ext_table_sum) {
+ if (print_err)
+ pr_warn("aborting, bad extended signature table checksum\n");
+ return -EINVAL;
+ }
+ }
+
+ /* calculate the checksum */
+ orig_sum = 0;
+ i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+ while (i--)
+ orig_sum += ((int *)mc)[i];
+ if (orig_sum) {
+ if (print_err)
+ pr_err("aborting, bad checksum\n");
+ return -EINVAL;
+ }
+ if (!ext_table_size)
+ return 0;
+ /* check extended signature checksum */
+ for (i = 0; i < ext_sigcount; i++) {
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
+ EXT_SIGNATURE_SIZE * i;
+ sum = orig_sum
+ - (mc_header->sig + mc_header->pf + mc_header->cksum)
+ + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+ if (sum) {
+ if (print_err)
+ pr_err("aborting, bad checksum\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(microcode_sanity_check);
+
+/*
+ * return 0 - no update found
+ * return 1 - found update
+ */
+int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
+{
+ struct microcode_header_intel *mc_header = mc;
+ struct extended_sigtable *ext_header;
+ unsigned long total_size = get_totalsize(mc_header);
+ int ext_sigcount, i;
+ struct extended_signature *ext_sig;
+
+ if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf))
+ return 1;
+
+ /* Look for ext. headers: */
+ if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
+ return 0;
+
+ ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
+ ext_sigcount = ext_header->count;
+ ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
+ for (i = 0; i < ext_sigcount; i++) {
+ if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf))
+ return 1;
+ ext_sig++;
+ }
+ return 0;
+}
+
+/*
+ * return 0 - no update found
+ * return 1 - found update
+ */
+int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
+{
+ struct microcode_header_intel *mc_header = mc;
+
+ if (!update_match_revision(mc_header, rev))
+ return 0;
+
+ return get_matching_sig(csig, cpf, mc, rev);
+}
+EXPORT_SYMBOL_GPL(get_matching_microcode);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index a7c5661f849..4929502c137 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -174,6 +174,9 @@ static int msr_open(struct inode *inode, struct file *file)
unsigned int cpu;
struct cpuinfo_x86 *c;
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
cpu = iminor(file->f_path.dentry->d_inode);
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index de2b7ad7027..872079a67e4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -56,7 +56,7 @@ struct device x86_dma_fallback_dev = {
EXPORT_SYMBOL(x86_dma_fallback_dev);
/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES 32768
+#define PREALLOC_DMA_DEBUG_ENTRIES 65536
int dma_set_mask(struct device *dev, u64 mask)
{
@@ -265,7 +265,7 @@ rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
-static __devinit void via_no_dac(struct pci_dev *dev)
+static void via_no_dac(struct pci_dev *dev)
{
if (forbid_dac == 0) {
dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 2ed787f15bf..14ae10031ff 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -268,13 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-#ifdef CONFIG_APM_MODULE
-EXPORT_SYMBOL(pm_idle);
-#endif
+static void (*x86_idle)(void);
#ifndef CONFIG_SMP
static inline void play_dead(void)
@@ -351,7 +345,7 @@ void cpu_idle(void)
rcu_idle_enter();
if (cpuidle_idle_call())
- pm_idle();
+ x86_idle();
rcu_idle_exit();
start_critical_timings();
@@ -375,7 +369,6 @@ void cpu_idle(void)
*/
void default_idle(void)
{
- trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
trace_cpu_idle_rcuidle(1, smp_processor_id());
current_thread_info()->status &= ~TS_POLLING;
/*
@@ -389,21 +382,22 @@ void default_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
- trace_power_end_rcuidle(smp_processor_id());
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(default_idle);
#endif
-bool set_pm_idle_to_default(void)
+#ifdef CONFIG_XEN
+bool xen_set_default_idle(void)
{
- bool ret = !!pm_idle;
+ bool ret = !!x86_idle;
- pm_idle = default_idle;
+ x86_idle = default_idle;
return ret;
}
+#endif
void stop_this_cpu(void *dummy)
{
local_irq_disable();
@@ -413,31 +407,8 @@ void stop_this_cpu(void *dummy)
set_cpu_online(smp_processor_id(), false);
disable_local_APIC();
- for (;;) {
- if (hlt_works(smp_processor_id()))
- halt();
- }
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
- if (!need_resched()) {
- trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
- trace_cpu_idle_rcuidle(1, smp_processor_id());
- if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
- clflush((void *)&current_thread_info()->flags);
-
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __sti_mwait(0, 0);
- else
- local_irq_enable();
- trace_power_end_rcuidle(smp_processor_id());
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
- } else
- local_irq_enable();
+ for (;;)
+ halt();
}
/*
@@ -447,62 +418,13 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
- trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
trace_cpu_idle_rcuidle(0, smp_processor_id());
local_irq_enable();
while (!need_resched())
cpu_relax();
- trace_power_end_rcuidle(smp_processor_id());
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
-/*
- * mwait selection logic:
- *
- * It depends on the CPU. For AMD CPUs that support MWAIT this is
- * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
- * then depend on a clock divisor and current Pstate of the core. If
- * all cores of a processor are in halt state (C1) the processor can
- * enter the C1E (C1 enhanced) state. If mwait is used this will never
- * happen.
- *
- * idle=mwait overrides this decision and forces the usage of mwait.
- */
-
-#define MWAIT_INFO 0x05
-#define MWAIT_ECX_EXTENDED_INFO 0x01
-#define MWAIT_EDX_C1 0xf0
-
-int mwait_usable(const struct cpuinfo_x86 *c)
-{
- u32 eax, ebx, ecx, edx;
-
- /* Use mwait if idle=mwait boot option is given */
- if (boot_option_idle_override == IDLE_FORCE_MWAIT)
- return 1;
-
- /*
- * Any idle= boot option other than idle=mwait means that we must not
- * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
- */
- if (boot_option_idle_override != IDLE_NO_OVERRIDE)
- return 0;
-
- if (c->cpuid_level < MWAIT_INFO)
- return 0;
-
- cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
- /* Check, whether EDX has extended info about MWAIT */
- if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
- return 1;
-
- /*
- * edx enumeratios MONITOR/MWAIT extensions. Check, whether
- * C1 supports MWAIT
- */
- return (edx & MWAIT_EDX_C1);
-}
-
bool amd_e400_c1e_detected;
EXPORT_SYMBOL(amd_e400_c1e_detected);
@@ -567,31 +489,24 @@ static void amd_e400_idle(void)
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
- if (pm_idle == poll_idle && smp_num_siblings > 1) {
+ if (x86_idle == poll_idle && smp_num_siblings > 1)
pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
- }
#endif
- if (pm_idle)
+ if (x86_idle)
return;
- if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
- /*
- * One CPU supports mwait => All CPUs supports mwait
- */
- pr_info("using mwait in idle threads\n");
- pm_idle = mwait_idle;
- } else if (cpu_has_amd_erratum(amd_erratum_400)) {
+ if (cpu_has_amd_erratum(amd_erratum_400)) {
/* E400: APIC timer interrupt does not wake up CPU from C1e */
pr_info("using AMD E400 aware idle routine\n");
- pm_idle = amd_e400_idle;
+ x86_idle = amd_e400_idle;
} else
- pm_idle = default_idle;
+ x86_idle = default_idle;
}
void __init init_amd_e400_c1e_mask(void)
{
/* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
- if (pm_idle == amd_e400_idle)
+ if (x86_idle == amd_e400_idle)
zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
}
@@ -602,11 +517,8 @@ static int __init idle_setup(char *str)
if (!strcmp(str, "poll")) {
pr_info("using polling idle threads\n");
- pm_idle = poll_idle;
+ x86_idle = poll_idle;
boot_option_idle_override = IDLE_POLL;
- } else if (!strcmp(str, "mwait")) {
- boot_option_idle_override = IDLE_FORCE_MWAIT;
- WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
} else if (!strcmp(str, "halt")) {
/*
* When the boot option of idle=halt is added, halt is
@@ -615,7 +527,7 @@ static int __init idle_setup(char *str)
* To continue to load the CPU idle driver, don't touch
* the boot_option_idle_override.
*/
- pm_idle = default_idle;
+ x86_idle = default_idle;
boot_option_idle_override = IDLE_HALT;
} else if (!strcmp(str, "nomwait")) {
/*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6e68a619496..0f49677da51 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,7 +117,7 @@ void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
if (dead_task->mm->context.size) {
- pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n",
+ pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm,
dead_task->mm->context.ldt,
dead_task->mm->context.size);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b629bbe0d9b..29a8120e6fe 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,7 +22,7 @@
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <linux/rcupdate.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/context_tracking.h>
#include <asm/uaccess.h>
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1b27de56356..26ee48a33dc 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -8,7 +8,7 @@
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
-static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
+static void quirk_intel_irqbalance(struct pci_dev *dev)
{
u8 config;
u16 word;
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
/* Set correct numa_node information for AMD NB functions */
-static void __devinit quirk_amd_nb_node(struct pci_dev *dev)
+static void quirk_amd_nb_node(struct pci_dev *dev)
{
struct pci_dev *nb_ht;
unsigned int devfn;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4e8ba39eaf0..76fa1e9a2b3 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -584,7 +584,7 @@ static void native_machine_emergency_restart(void)
break;
case BOOT_EFI:
- if (efi_enabled)
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
efi.reset_system(reboot_mode ?
EFI_RESET_WARM :
EFI_RESET_COLD,
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 801602b5d74..2e8f3d3b564 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void)
if (century) {
century = bcd2bin(century);
year += century * 100;
- printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
} else
year += CMOS_YEARS_OFFS;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 23ddd558fbd..9c857f05cef 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -108,17 +108,16 @@
#include <asm/topology.h>
#include <asm/apicdef.h>
#include <asm/amd_nb.h>
-#ifdef CONFIG_X86_64
-#include <asm/numa_64.h>
-#endif
#include <asm/mce.h>
#include <asm/alternative.h>
#include <asm/prom.h>
/*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
+ * max_low_pfn_mapped: highest direct mapped pfn under 4GB
+ * max_pfn_mapped: highest direct mapped pfn over 4GB
+ *
+ * The direct mapping only covers E820_RAM regions, so the ranges and gaps are
+ * represented by pfn_mapped
*/
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
@@ -276,18 +275,7 @@ void * __init extend_brk(size_t size, size_t align)
return ret;
}
-#ifdef CONFIG_X86_64
-static void __init init_gbpages(void)
-{
- if (direct_gbpages && cpu_has_gbpages)
- printk(KERN_INFO "Using GB pages for direct mapping\n");
- else
- direct_gbpages = 0;
-}
-#else
-static inline void init_gbpages(void)
-{
-}
+#ifdef CONFIG_X86_32
static void __init cleanup_highmap(void)
{
}
@@ -296,8 +284,8 @@ static void __init cleanup_highmap(void)
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
- memblock_reserve(__pa(_brk_start),
- __pa(_brk_end) - __pa(_brk_start));
+ memblock_reserve(__pa_symbol(_brk_start),
+ _brk_end - _brk_start);
/* Mark brk area as locked down and no longer taking any
new allocations */
@@ -306,27 +294,43 @@ static void __init reserve_brk(void)
#ifdef CONFIG_BLK_DEV_INITRD
+static u64 __init get_ramdisk_image(void)
+{
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+
+ ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
+
+ return ramdisk_image;
+}
+static u64 __init get_ramdisk_size(void)
+{
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+
+ ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
+
+ return ramdisk_size;
+}
+
#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
static void __init relocate_initrd(void)
{
/* Assume only end is not page aligned */
- u64 ramdisk_image = boot_params.hdr.ramdisk_image;
- u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 ramdisk_image = get_ramdisk_image();
+ u64 ramdisk_size = get_ramdisk_size();
u64 area_size = PAGE_ALIGN(ramdisk_size);
- u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
u64 ramdisk_here;
unsigned long slop, clen, mapaddr;
char *p, *q;
- /* We need to move the initrd down into lowmem */
- ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
- PAGE_SIZE);
+ /* We need to move the initrd down into directly mapped mem */
+ ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
+ area_size, PAGE_SIZE);
if (!ramdisk_here)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
- /* Note: this includes all the lowmem currently occupied by
+ /* Note: this includes all the mem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
memblock_reserve(ramdisk_here, area_size);
initrd_start = ramdisk_here + PAGE_OFFSET;
@@ -336,17 +340,7 @@ static void __init relocate_initrd(void)
q = (char *)initrd_start;
- /* Copy any lowmem portion of the initrd */
- if (ramdisk_image < end_of_lowmem) {
- clen = end_of_lowmem - ramdisk_image;
- p = (char *)__va(ramdisk_image);
- memcpy(q, p, clen);
- q += clen;
- ramdisk_image += clen;
- ramdisk_size -= clen;
- }
-
- /* Copy the highmem portion of the initrd */
+ /* Copy the initrd */
while (ramdisk_size) {
slop = ramdisk_image & ~PAGE_MASK;
clen = ramdisk_size;
@@ -360,22 +354,35 @@ static void __init relocate_initrd(void)
ramdisk_image += clen;
ramdisk_size -= clen;
}
- /* high pages is not converted by early_res_to_bootmem */
- ramdisk_image = boot_params.hdr.ramdisk_image;
- ramdisk_size = boot_params.hdr.ramdisk_size;
+
+ ramdisk_image = get_ramdisk_image();
+ ramdisk_size = get_ramdisk_size();
printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
" [mem %#010llx-%#010llx]\n",
ramdisk_image, ramdisk_image + ramdisk_size - 1,
ramdisk_here, ramdisk_here + ramdisk_size - 1);
}
+static void __init early_reserve_initrd(void)
+{
+ /* Assume only end is not page aligned */
+ u64 ramdisk_image = get_ramdisk_image();
+ u64 ramdisk_size = get_ramdisk_size();
+ u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
+
+ if (!boot_params.hdr.type_of_loader ||
+ !ramdisk_image || !ramdisk_size)
+ return; /* No initrd provided by bootloader */
+
+ memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
+}
static void __init reserve_initrd(void)
{
/* Assume only end is not page aligned */
- u64 ramdisk_image = boot_params.hdr.ramdisk_image;
- u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 ramdisk_image = get_ramdisk_image();
+ u64 ramdisk_size = get_ramdisk_size();
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
+ u64 mapped_size;
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
@@ -383,22 +390,18 @@ static void __init reserve_initrd(void)
initrd_start = 0;
- if (ramdisk_size >= (end_of_lowmem>>1)) {
+ mapped_size = memblock_mem_size(max_pfn_mapped);
+ if (ramdisk_size >= (mapped_size>>1))
panic("initrd too large to handle, "
"disabling initrd (%lld needed, %lld available)\n",
- ramdisk_size, end_of_lowmem>>1);
- }
+ ramdisk_size, mapped_size>>1);
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
ramdisk_end - 1);
-
- if (ramdisk_end <= end_of_lowmem) {
- /* All in lowmem, easy case */
- /*
- * don't need to reserve again, already reserved early
- * in i386_start_kernel
- */
+ if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image),
+ PFN_DOWN(ramdisk_end))) {
+ /* All are mapped, easy case */
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
return;
@@ -409,6 +412,9 @@ static void __init reserve_initrd(void)
memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
}
#else
+static void __init early_reserve_initrd(void)
+{
+}
static void __init reserve_initrd(void)
{
}
@@ -419,8 +425,6 @@ static void __init parse_setup_data(void)
struct setup_data *data;
u64 pa_data;
- if (boot_params.hdr.version < 0x0209)
- return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
u32 data_len, map_len;
@@ -456,8 +460,6 @@ static void __init e820_reserve_setup_data(void)
u64 pa_data;
int found = 0;
- if (boot_params.hdr.version < 0x0209)
- return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
@@ -481,8 +483,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
struct setup_data *data;
u64 pa_data;
- if (boot_params.hdr.version < 0x0209)
- return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
@@ -501,17 +501,51 @@ static void __init memblock_x86_reserve_range_setup_data(void)
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
- * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
- * limit once kexec-tools are fixed.
*/
#ifdef CONFIG_X86_32
# define CRASH_KERNEL_ADDR_MAX (512 << 20)
#else
-# define CRASH_KERNEL_ADDR_MAX (896 << 20)
+# define CRASH_KERNEL_ADDR_MAX MAXMEM
#endif
+static void __init reserve_crashkernel_low(void)
+{
+#ifdef CONFIG_X86_64
+ const unsigned long long alignment = 16<<20; /* 16M */
+ unsigned long long low_base = 0, low_size = 0;
+ unsigned long total_low_mem;
+ unsigned long long base;
+ int ret;
+
+ total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
+ ret = parse_crashkernel_low(boot_command_line, total_low_mem,
+ &low_size, &base);
+ if (ret != 0 || low_size <= 0)
+ return;
+
+ low_base = memblock_find_in_range(low_size, (1ULL<<32),
+ low_size, alignment);
+
+ if (!low_base) {
+ pr_info("crashkernel low reservation failed - No suitable area found.\n");
+
+ return;
+ }
+
+ memblock_reserve(low_base, low_size);
+ pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
+ (unsigned long)(low_size >> 20),
+ (unsigned long)(low_base >> 20),
+ (unsigned long)(total_low_mem >> 20));
+ crashk_low_res.start = low_base;
+ crashk_low_res.end = low_base + low_size - 1;
+ insert_resource(&iomem_resource, &crashk_low_res);
+#endif
+}
+
static void __init reserve_crashkernel(void)
{
+ const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long total_mem;
unsigned long long crash_size, crash_base;
int ret;
@@ -525,8 +559,6 @@ static void __init reserve_crashkernel(void)
/* 0 means: find the address automatically */
if (crash_base <= 0) {
- const unsigned long long alignment = 16<<20; /* 16M */
-
/*
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
*/
@@ -537,6 +569,7 @@ static void __init reserve_crashkernel(void)
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
+
} else {
unsigned long long start;
@@ -558,6 +591,9 @@ static void __init reserve_crashkernel(void)
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
+
+ if (crash_base >= (1ULL<<32))
+ reserve_crashkernel_low();
}
#else
static void __init reserve_crashkernel(void)
@@ -608,7 +644,82 @@ static __init void reserve_ibft_region(void)
memblock_reserve(addr, size);
}
-static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
+static bool __init snb_gfx_workaround_needed(void)
+{
+#ifdef CONFIG_PCI
+ int i;
+ u16 vendor, devid;
+ static const __initconst u16 snb_ids[] = {
+ 0x0102,
+ 0x0112,
+ 0x0122,
+ 0x0106,
+ 0x0116,
+ 0x0126,
+ 0x010a,
+ };
+
+ /* Assume no if something weird is going on with PCI */
+ if (!early_pci_allowed())
+ return false;
+
+ vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID);
+ if (vendor != 0x8086)
+ return false;
+
+ devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID);
+ for (i = 0; i < ARRAY_SIZE(snb_ids); i++)
+ if (devid == snb_ids[i])
+ return true;
+#endif
+
+ return false;
+}
+
+/*
+ * Sandy Bridge graphics has trouble with certain ranges, exclude
+ * them from allocation.
+ */
+static void __init trim_snb_memory(void)
+{
+ static const __initconst unsigned long bad_pages[] = {
+ 0x20050000,
+ 0x20110000,
+ 0x20130000,
+ 0x20138000,
+ 0x40004000,
+ };
+ int i;
+
+ if (!snb_gfx_workaround_needed())
+ return;
+
+ printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n");
+
+ /*
+ * Reserve all memory below the 1 MB mark that has not
+ * already been reserved.
+ */
+ memblock_reserve(0, 1<<20);
+
+ for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
+ if (memblock_reserve(bad_pages[i], PAGE_SIZE))
+ printk(KERN_WARNING "failed to reserve 0x%08lx\n",
+ bad_pages[i]);
+ }
+}
+
+/*
+ * Here we put platform-specific memory range workarounds, i.e.
+ * memory known to be corrupt or otherwise in need to be reserved on
+ * specific platforms.
+ *
+ * If this gets used more widely it could use a real dispatch mechanism.
+ */
+static void __init trim_platform_memory_ranges(void)
+{
+ trim_snb_memory();
+}
static void __init trim_bios_range(void)
{
@@ -621,8 +732,7 @@ static void __init trim_bios_range(void)
* since some BIOSes are known to corrupt low memory. See the
* Kconfig help text for X86_RESERVE_LOW.
*/
- e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
- E820_RAM, E820_RESERVED);
+ e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
/*
* special case: Some BIOSen report the PC BIOS
@@ -630,9 +740,33 @@ static void __init trim_bios_range(void)
* take them out.
*/
e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
+/* called before trim_bios_range() to spare extra sanitize */
+static void __init e820_add_kernel_range(void)
+{
+ u64 start = __pa_symbol(_text);
+ u64 size = __pa_symbol(_end) - start;
+
+ /*
+ * Complain if .text .data and .bss are not marked as E820_RAM and
+ * attempt to fix it by adding the range. We may have a confused BIOS,
+ * or the user may have used memmap=exactmap or memmap=xxM$yyM to
+ * exclude kernel range. If we really are running on top non-RAM,
+ * we will crash later anyways.
+ */
+ if (e820_all_mapped(start, start + size, E820_RAM))
+ return;
+
+ pr_warn(".text .data .bss are not marked as E820_RAM!\n");
+ e820_remove_range(start, size, E820_RAM, 0);
+ e820_add_region(start, size, E820_RAM);
+}
+
+static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
+
static int __init parse_reservelow(char *p)
{
unsigned long long size;
@@ -655,6 +789,11 @@ static int __init parse_reservelow(char *p)
early_param("reservelow", parse_reservelow);
+static void __init trim_low_memory_range(void)
+{
+ memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
+}
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -670,6 +809,17 @@ early_param("reservelow", parse_reservelow);
void __init setup_arch(char **cmdline_p)
{
+ memblock_reserve(__pa_symbol(_text),
+ (unsigned long)__bss_stop - (unsigned long)_text);
+
+ early_reserve_initrd();
+
+ /*
+ * At this point everything still needed from the boot loader
+ * or BIOS or kernel text should be early reserved or marked not
+ * RAM in e820. All other memory is free game.
+ */
+
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
@@ -729,15 +879,15 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL32", 4)) {
- efi_enabled = 1;
- efi_64bit = false;
+ set_bit(EFI_BOOT, &x86_efi_facility);
} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL64", 4)) {
- efi_enabled = 1;
- efi_64bit = true;
+ set_bit(EFI_BOOT, &x86_efi_facility);
+ set_bit(EFI_64BIT, &x86_efi_facility);
}
- if (efi_enabled && efi_memblock_x86_reserve_range())
- efi_enabled = 0;
+
+ if (efi_enabled(EFI_BOOT))
+ efi_memblock_x86_reserve_range();
#endif
x86_init.oem.arch_setup();
@@ -757,12 +907,12 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = _brk_end;
- code_resource.start = virt_to_phys(_text);
- code_resource.end = virt_to_phys(_etext)-1;
- data_resource.start = virt_to_phys(_etext);
- data_resource.end = virt_to_phys(_edata)-1;
- bss_resource.start = virt_to_phys(&__bss_start);
- bss_resource.end = virt_to_phys(&__bss_stop)-1;
+ code_resource.start = __pa_symbol(_text);
+ code_resource.end = __pa_symbol(_etext)-1;
+ data_resource.start = __pa_symbol(_etext);
+ data_resource.end = __pa_symbol(_edata)-1;
+ bss_resource.start = __pa_symbol(__bss_start);
+ bss_resource.end = __pa_symbol(__bss_stop)-1;
#ifdef CONFIG_CMDLINE_BOOL
#ifdef CONFIG_CMDLINE_OVERRIDE
@@ -810,7 +960,7 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing();
- if (efi_enabled)
+ if (efi_enabled(EFI_BOOT))
efi_init();
dmi_scan_machine();
@@ -828,6 +978,7 @@ void __init setup_arch(char **cmdline_p)
insert_resource(&iomem_resource, &data_resource);
insert_resource(&iomem_resource, &bss_resource);
+ e820_add_kernel_range();
trim_bios_range();
#ifdef CONFIG_X86_32
if (ppro_with_ram_bug()) {
@@ -877,6 +1028,8 @@ void __init setup_arch(char **cmdline_p)
reserve_ibft_region();
+ early_alloc_pgt_buf();
+
/*
* Need to conclude brk, before memblock_x86_fill()
* it could use memblock_find_in_range, could overlap with
@@ -886,14 +1039,14 @@ void __init setup_arch(char **cmdline_p)
cleanup_highmap();
- memblock.current_limit = get_max_mapped();
+ memblock.current_limit = ISA_END_ADDRESS;
memblock_x86_fill();
/*
* The EFI specification says that boot service code won't be called
* after ExitBootServices(). This is, in fact, a lie.
*/
- if (efi_enabled)
+ if (efi_enabled(EFI_MEMMAP))
efi_reserve_boot_services();
/* preallocate 4k for mptable mpc */
@@ -903,39 +1056,31 @@ void __init setup_arch(char **cmdline_p)
setup_bios_corruption_check();
#endif
+ /*
+ * In the memory hotplug case, the kernel needs info from SRAT to
+ * determine which memory is hotpluggable before allocating memory
+ * using memblock.
+ */
+ acpi_boot_table_init();
+ early_acpi_boot_init();
+ early_parse_srat();
+
+#ifdef CONFIG_X86_32
printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
(max_pfn_mapped<<PAGE_SHIFT) - 1);
+#endif
- setup_real_mode();
-
- init_gbpages();
-
- /* max_pfn_mapped is updated here */
- max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
- max_pfn_mapped = max_low_pfn_mapped;
+ reserve_real_mode();
-#ifdef CONFIG_X86_64
- if (max_pfn > max_low_pfn) {
- int i;
- unsigned long start, end;
- unsigned long start_pfn, end_pfn;
+ trim_platform_memory_ranges();
+ trim_low_memory_range();
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
- NULL) {
+ init_mem_mapping();
- end = PFN_PHYS(end_pfn);
- if (end <= (1UL<<32))
- continue;
+ early_trap_pf_init();
- start = PFN_PHYS(start_pfn);
- max_pfn_mapped = init_memory_mapping(
- max((1UL<<32), start), end);
- }
+ setup_real_mode();
- /* can we preseve max_low_pfn ?*/
- max_low_pfn = max_pfn;
- }
-#endif
memblock.current_limit = get_max_mapped();
dma_contiguous_reserve(0);
@@ -965,10 +1110,6 @@ void __init setup_arch(char **cmdline_p)
/*
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
- acpi_boot_table_init();
-
- early_acpi_boot_init();
-
initmem_init();
memblock_find_dma_reserve();
@@ -1034,7 +1175,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
- if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+ if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
@@ -1051,14 +1192,14 @@ void __init setup_arch(char **cmdline_p)
register_refined_jiffies(CLOCK_TICK_RATE);
#ifdef CONFIG_EFI
- /* Once setup is done above, disable efi_enabled on mismatched
- * firmware/kernel archtectures since there is no support for
- * runtime services.
+ /* Once setup is done above, unmap the EFI memory map on
+ * mismatched firmware/kernel archtectures since there is no
+ * support for runtime services.
*/
- if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
+ if (efi_enabled(EFI_BOOT) &&
+ IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) {
pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
efi_unmap_memmap();
- efi_enabled = 0;
}
#endif
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index d6bf1f34a6e..69562992e45 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -278,7 +278,7 @@ static const struct {
};
static int
-__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+__setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
struct sigframe __user *frame;
@@ -286,7 +286,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
int err = 0;
void __user *fpstate = NULL;
- frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+ frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
@@ -307,8 +307,8 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
else
restorer = &frame->retcode;
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
+ restorer = ksig->ka.sa.sa_restorer;
/* Set up to return from userspace. */
err |= __put_user(restorer, &frame->pretcode);
@@ -327,7 +327,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
- regs->ip = (unsigned long)ka->sa.sa_handler;
+ regs->ip = (unsigned long)ksig->ka.sa.sa_handler;
regs->ax = (unsigned long)sig;
regs->dx = 0;
regs->cx = 0;
@@ -340,7 +340,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
return 0;
}
-static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+static int __setup_rt_frame(int sig, struct ksignal *ksig,
sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
@@ -348,7 +348,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
int err = 0;
void __user *fpstate = NULL;
- frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+ frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
@@ -368,8 +368,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
- if (ka->sa.sa_flags & SA_RESTORER)
- restorer = ka->sa.sa_restorer;
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
+ restorer = ksig->ka.sa.sa_restorer;
put_user_ex(restorer, &frame->pretcode);
/*
@@ -382,7 +382,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
} put_user_catch(err);
- err |= copy_siginfo_to_user(&frame->info, info);
+ err |= copy_siginfo_to_user(&frame->info, &ksig->info);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -392,7 +392,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
- regs->ip = (unsigned long)ka->sa.sa_handler;
+ regs->ip = (unsigned long)ksig->ka.sa.sa_handler;
regs->ax = (unsigned long)sig;
regs->dx = (unsigned long)&frame->info;
regs->cx = (unsigned long)&frame->uc;
@@ -405,20 +405,20 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
return 0;
}
#else /* !CONFIG_X86_32 */
-static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+static int __setup_rt_frame(int sig, struct ksignal *ksig,
sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
void __user *fp = NULL;
int err = 0;
- frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
+ frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
- if (ka->sa.sa_flags & SA_SIGINFO) {
- if (copy_siginfo_to_user(&frame->info, info))
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
return -EFAULT;
}
@@ -434,8 +434,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
/* x86-64 should always use SA_RESTORER. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- put_user_ex(ka->sa.sa_restorer, &frame->pretcode);
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ put_user_ex(ksig->ka.sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
err |= -EFAULT;
@@ -457,7 +457,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
next argument after the signal number on the stack. */
regs->si = (unsigned long)&frame->info;
regs->dx = (unsigned long)&frame->uc;
- regs->ip = (unsigned long) ka->sa.sa_handler;
+ regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
regs->sp = (unsigned long)frame;
@@ -469,8 +469,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
}
#endif /* CONFIG_X86_32 */
-static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
- siginfo_t *info, compat_sigset_t *set,
+static int x32_setup_rt_frame(struct ksignal *ksig,
+ compat_sigset_t *set,
struct pt_regs *regs)
{
#ifdef CONFIG_X86_X32_ABI
@@ -479,13 +479,13 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
int err = 0;
void __user *fpstate = NULL;
- frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+ frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
- if (ka->sa.sa_flags & SA_SIGINFO) {
- if (copy_siginfo_to_user32(&frame->info, info))
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+ if (copy_siginfo_to_user32(&frame->info, &ksig->info))
return -EFAULT;
}
@@ -499,8 +499,8 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
- if (ka->sa.sa_flags & SA_RESTORER) {
- restorer = ka->sa.sa_restorer;
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = ksig->ka.sa.sa_restorer;
} else {
/* could use a vstub here */
restorer = NULL;
@@ -518,10 +518,10 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
/* Set up registers for signal handler */
regs->sp = (unsigned long) frame;
- regs->ip = (unsigned long) ka->sa.sa_handler;
+ regs->ip = (unsigned long) ksig->ka.sa.sa_handler;
/* We use the x32 calling convention here... */
- regs->di = sig;
+ regs->di = ksig->sig;
regs->si = (unsigned long) &frame->info;
regs->dx = (unsigned long) &frame->uc;
@@ -535,70 +535,13 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
return 0;
}
-#ifdef CONFIG_X86_32
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-asmlinkage int
-sys_sigsuspend(int history0, int history1, old_sigset_t mask)
-{
- sigset_t blocked;
- siginitset(&blocked, mask);
- return sigsuspend(&blocked);
-}
-
-asmlinkage int
-sys_sigaction(int sig, const struct old_sigaction __user *act,
- struct old_sigaction __user *oact)
-{
- struct k_sigaction new_ka, old_ka;
- int ret = 0;
-
- if (act) {
- old_sigset_t mask;
-
- if (!access_ok(VERIFY_READ, act, sizeof(*act)))
- return -EFAULT;
-
- get_user_try {
- get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
- get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
- get_user_ex(mask, &act->sa_mask);
- get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
- } get_user_catch(ret);
-
- if (ret)
- return -EFAULT;
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
- return -EFAULT;
-
- put_user_try {
- put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
- put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
- put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
- put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
- } put_user_catch(ret);
-
- if (ret)
- return -EFAULT;
- }
-
- return ret;
-}
-#endif /* CONFIG_X86_32 */
-
/*
* Do a signal return; undo the signal stack.
*/
#ifdef CONFIG_X86_32
-unsigned long sys_sigreturn(struct pt_regs *regs)
+unsigned long sys_sigreturn(void)
{
+ struct pt_regs *regs = current_pt_regs();
struct sigframe __user *frame;
unsigned long ax;
sigset_t set;
@@ -625,8 +568,9 @@ badframe:
}
#endif /* CONFIG_X86_32 */
-long sys_rt_sigreturn(struct pt_regs *regs)
+long sys_rt_sigreturn(void)
{
+ struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
unsigned long ax;
sigset_t set;
@@ -667,30 +611,29 @@ static int signr_convert(int sig)
}
static int
-setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- struct pt_regs *regs)
+setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
{
- int usig = signr_convert(sig);
+ int usig = signr_convert(ksig->sig);
sigset_t *set = sigmask_to_save();
compat_sigset_t *cset = (compat_sigset_t *) set;
/* Set up the stack frame */
if (is_ia32_frame()) {
- if (ka->sa.sa_flags & SA_SIGINFO)
- return ia32_setup_rt_frame(usig, ka, info, cset, regs);
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ return ia32_setup_rt_frame(usig, ksig, cset, regs);
else
- return ia32_setup_frame(usig, ka, cset, regs);
+ return ia32_setup_frame(usig, ksig, cset, regs);
} else if (is_x32_frame()) {
- return x32_setup_rt_frame(usig, ka, info, cset, regs);
+ return x32_setup_rt_frame(ksig, cset, regs);
} else {
- return __setup_rt_frame(sig, ka, info, set, regs);
+ return __setup_rt_frame(ksig->sig, ksig, set, regs);
}
}
static void
-handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
- struct pt_regs *regs)
+handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
+ bool failed;
/* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
@@ -701,7 +644,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
break;
case -ERESTARTSYS:
- if (!(ka->sa.sa_flags & SA_RESTART)) {
+ if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
regs->ax = -EINTR;
break;
}
@@ -721,26 +664,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
- if (setup_rt_frame(sig, ka, info, regs) < 0) {
- force_sigsegv(sig, current);
- return;
+ failed = (setup_rt_frame(ksig, regs) < 0);
+ if (!failed) {
+ /*
+ * Clear the direction flag as per the ABI for function entry.
+ */
+ regs->flags &= ~X86_EFLAGS_DF;
+ /*
+ * Clear TF when entering the signal handler, but
+ * notify any tracer that was single-stepping it.
+ * The tracer may want to single-step inside the
+ * handler too.
+ */
+ regs->flags &= ~X86_EFLAGS_TF;
}
-
- /*
- * Clear the direction flag as per the ABI for function entry.
- */
- regs->flags &= ~X86_EFLAGS_DF;
-
- /*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- regs->flags &= ~X86_EFLAGS_TF;
-
- signal_delivered(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLESTEP));
+ signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
}
#ifdef CONFIG_X86_32
@@ -757,14 +695,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
*/
static void do_signal(struct pt_regs *regs)
{
- struct k_sigaction ka;
- siginfo_t info;
- int signr;
+ struct ksignal ksig;
- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
- if (signr > 0) {
+ if (get_signal(&ksig)) {
/* Whee! Actually deliver the signal. */
- handle_signal(signr, &info, &ka, regs);
+ handle_signal(&ksig, regs);
return;
}
@@ -843,8 +778,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
}
#ifdef CONFIG_X86_X32_ABI
-asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
+asmlinkage long sys32_x32_rt_sigreturn(void)
{
+ struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
sigset_t set;
unsigned long ax;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ed0fe385289..a6ceaedc396 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1369,7 +1369,7 @@ static inline void mwait_play_dead(void)
void *mwait_ptr;
struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
- if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
+ if (!this_cpu_has(X86_FEATURE_MWAIT))
return;
if (!this_cpu_has(X86_FEATURE_CLFLSH))
return;
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index cd3b2438a98..9b4d51d0c0d 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -165,10 +165,11 @@ void set_task_blockstep(struct task_struct *task, bool on)
* Ensure irq/preemption can't change debugctl in between.
* Note also that both TIF_BLOCKSTEP and debugctl should
* be changed atomically wrt preemption.
- * FIXME: this means that set/clear TIF_BLOCKSTEP is simply
- * wrong if task != current, SIGKILL can wakeup the stopped
- * tracee and set/clear can play with the running task, this
- * can confuse the next __switch_to_xtra().
+ *
+ * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
+ * task is current or it can't be running, otherwise we can race
+ * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
+ * PTRACE_KILL is not safe.
*/
local_irq_disable();
debugctl = get_debugctlmsr();
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 97ef74b88e0..dbded5aedb8 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
if (flags & MAP_FIXED)
return addr;
- /* for MAP_32BIT mappings we force the legact mmap base */
+ /* for MAP_32BIT mappings we force the legacy mmap base */
if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
goto bottomup;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ecffca11f4e..68bda7a8415 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -688,10 +688,19 @@ void __init early_trap_init(void)
set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
/* int3 can be called from all */
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+#ifdef CONFIG_X86_32
set_intr_gate(X86_TRAP_PF, &page_fault);
+#endif
load_idt(&idt_descr);
}
+void __init early_trap_pf_init(void)
+{
+#ifdef CONFIG_X86_64
+ set_intr_gate(X86_TRAP_PF, &page_fault);
+#endif
+}
+
void __init trap_init(void)
{
int i;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 06ccb5073a3..4b9ea101fe3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
ns_now = __cycles_2_ns(tsc_now);
if (cpu_khz) {
- *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+ *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
+ cpu_khz / 2) / cpu_khz;
*offset = ns_now - mult_frac(tsc_now, *scale,
(1UL << CYC2NS_SCALE_FACTOR));
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index c71025b6746..0ba4cfb4f41 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
if (auprobe->insn[i] == 0x66)
continue;
- if (auprobe->insn[i] == 0x90)
+ if (auprobe->insn[i] == 0x90) {
+ regs->ip += i + 1;
return true;
+ }
break;
}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 1dfe69cc78a..1cf5766dde1 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -202,7 +202,7 @@ out:
static int do_vm86_irq_handling(int subfunction, int irqnumber);
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
-int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs)
+int sys_vm86old(struct vm86_struct __user *v86)
{
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
@@ -222,7 +222,7 @@ int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs)
if (tmp)
goto out;
memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
- info.regs32 = regs;
+ info.regs32 = current_pt_regs();
tsk->thread.vm86_info = v86;
do_sys_vm86(&info, tsk);
ret = 0; /* we never return here */
@@ -231,7 +231,7 @@ out:
}
-int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs)
+int sys_vm86(unsigned long cmd, unsigned long arg)
{
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
@@ -272,7 +272,7 @@ int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs)
ret = -EFAULT;
if (tmp)
goto out;
- info.regs32 = regs;
+ info.regs32 = current_pt_regs();
info.vm86plus.is_vm86pus = 1;
tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
do_sys_vm86(&info, tsk);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1330dd10295..b014d9414d0 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(memmove);
+#ifndef CONFIG_DEBUG_VIRTUAL
+EXPORT_SYMBOL(phys_base);
+#endif
EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
EXPORT_SYMBOL(native_load_gs_index);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7a3d075a814..45a14dbbdda 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -19,6 +19,7 @@
#include <asm/time.h>
#include <asm/irq.h>
#include <asm/io_apic.h>
+#include <asm/hpet.h>
#include <asm/pat.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
@@ -62,10 +63,6 @@ struct x86_init_ops x86_init __initdata = {
.banner = default_banner,
},
- .mapping = {
- .pagetable_reserve = native_pagetable_reserve,
- },
-
.paging = {
.pagetable_init = native_pagetable_init,
},
@@ -111,15 +108,22 @@ struct x86_platform_ops x86_platform = {
EXPORT_SYMBOL_GPL(x86_platform);
struct x86_msi_ops x86_msi = {
- .setup_msi_irqs = native_setup_msi_irqs,
- .teardown_msi_irq = native_teardown_msi_irq,
- .teardown_msi_irqs = default_teardown_msi_irqs,
- .restore_msi_irqs = default_restore_msi_irqs,
+ .setup_msi_irqs = native_setup_msi_irqs,
+ .compose_msi_msg = native_compose_msi_msg,
+ .teardown_msi_irq = native_teardown_msi_irq,
+ .teardown_msi_irqs = default_teardown_msi_irqs,
+ .restore_msi_irqs = default_restore_msi_irqs,
+ .setup_hpet_msi = default_setup_hpet_msi,
};
struct x86_io_apic_ops x86_io_apic_ops = {
- .init = native_io_apic_init_mappings,
- .read = native_io_apic_read,
- .write = native_io_apic_write,
- .modify = native_io_apic_modify,
+ .init = native_io_apic_init_mappings,
+ .read = native_io_apic_read,
+ .write = native_io_apic_write,
+ .modify = native_io_apic_modify,
+ .disable = native_disable_io_apic,
+ .print_entries = native_io_apic_print_entries,
+ .set_affinity = native_ioapic_set_affinity,
+ .setup_entry = native_setup_ioapic_entry,
+ .eoi_ioapic_pin = native_eoi_ioapic_pin,
};
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a27e7637110..a335cc6cde7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -24,6 +24,7 @@
#include "kvm_cache_regs.h"
#include <linux/module.h>
#include <asm/kvm_emulate.h>
+#include <linux/stringify.h>
#include "x86.h"
#include "tss.h"
@@ -43,7 +44,7 @@
#define OpCL 9ull /* CL register (for shifts) */
#define OpImmByte 10ull /* 8-bit sign extended immediate */
#define OpOne 11ull /* Implied 1 */
-#define OpImm 12ull /* Sign extended immediate */
+#define OpImm 12ull /* Sign extended up to 32-bit immediate */
#define OpMem16 13ull /* Memory operand (16-bit). */
#define OpMem32 14ull /* Memory operand (32-bit). */
#define OpImmU 15ull /* Immediate operand, zero extended */
@@ -58,6 +59,7 @@
#define OpFS 24ull /* FS */
#define OpGS 25ull /* GS */
#define OpMem8 26ull /* 8-bit zero extended memory operand */
+#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
#define OpBits 5 /* Width of operand field */
#define OpMask ((1ull << OpBits) - 1)
@@ -101,6 +103,7 @@
#define SrcMemFAddr (OpMemFAddr << SrcShift)
#define SrcAcc (OpAcc << SrcShift)
#define SrcImmU16 (OpImmU16 << SrcShift)
+#define SrcImm64 (OpImm64 << SrcShift)
#define SrcDX (OpDX << SrcShift)
#define SrcMem8 (OpMem8 << SrcShift)
#define SrcMask (OpMask << SrcShift)
@@ -113,6 +116,7 @@
#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
+#define Escape (5<<15) /* Escape to coprocessor instruction */
#define Sse (1<<18) /* SSE Vector instruction */
/* Generic ModRM decode. */
#define ModRM (1<<19)
@@ -146,6 +150,8 @@
#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
+#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
+#define NoWrite ((u64)1 << 45) /* No writeback */
#define X2(x...) x, x
#define X3(x...) X2(x), x
@@ -156,6 +162,27 @@
#define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x)
+#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
+#define FASTOP_SIZE 8
+
+/*
+ * fastop functions have a special calling convention:
+ *
+ * dst: [rdx]:rax (in/out)
+ * src: rbx (in/out)
+ * src2: rcx (in)
+ * flags: rflags (in/out)
+ *
+ * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
+ * different operand sizes can be reached by calculation, rather than a jump
+ * table (which would be bigger than the code).
+ *
+ * fastop functions are declared as taking a never-defined fastop parameter,
+ * so they can't be called from C directly.
+ */
+
+struct fastop;
+
struct opcode {
u64 flags : 56;
u64 intercept : 8;
@@ -164,6 +191,8 @@ struct opcode {
const struct opcode *group;
const struct group_dual *gdual;
const struct gprefix *gprefix;
+ const struct escape *esc;
+ void (*fastop)(struct fastop *fake);
} u;
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
};
@@ -180,6 +209,11 @@ struct gprefix {
struct opcode pfx_f3;
};
+struct escape {
+ struct opcode op[8];
+ struct opcode high[64];
+};
+
/* EFLAGS bit definitions. */
#define EFLG_ID (1<<21)
#define EFLG_VIP (1<<20)
@@ -407,6 +441,97 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
} \
} while (0)
+static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
+
+#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
+#define FOP_RET "ret \n\t"
+
+#define FOP_START(op) \
+ extern void em_##op(struct fastop *fake); \
+ asm(".pushsection .text, \"ax\" \n\t" \
+ ".global em_" #op " \n\t" \
+ FOP_ALIGN \
+ "em_" #op ": \n\t"
+
+#define FOP_END \
+ ".popsection")
+
+#define FOPNOP() FOP_ALIGN FOP_RET
+
+#define FOP1E(op, dst) \
+ FOP_ALIGN #op " %" #dst " \n\t" FOP_RET
+
+#define FASTOP1(op) \
+ FOP_START(op) \
+ FOP1E(op##b, al) \
+ FOP1E(op##w, ax) \
+ FOP1E(op##l, eax) \
+ ON64(FOP1E(op##q, rax)) \
+ FOP_END
+
+#define FOP2E(op, dst, src) \
+ FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
+
+#define FASTOP2(op) \
+ FOP_START(op) \
+ FOP2E(op##b, al, bl) \
+ FOP2E(op##w, ax, bx) \
+ FOP2E(op##l, eax, ebx) \
+ ON64(FOP2E(op##q, rax, rbx)) \
+ FOP_END
+
+/* 2 operand, word only */
+#define FASTOP2W(op) \
+ FOP_START(op) \
+ FOPNOP() \
+ FOP2E(op##w, ax, bx) \
+ FOP2E(op##l, eax, ebx) \
+ ON64(FOP2E(op##q, rax, rbx)) \
+ FOP_END
+
+/* 2 operand, src is CL */
+#define FASTOP2CL(op) \
+ FOP_START(op) \
+ FOP2E(op##b, al, cl) \
+ FOP2E(op##w, ax, cl) \
+ FOP2E(op##l, eax, cl) \
+ ON64(FOP2E(op##q, rax, cl)) \
+ FOP_END
+
+#define FOP3E(op, dst, src, src2) \
+ FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
+
+/* 3-operand, word-only, src2=cl */
+#define FASTOP3WCL(op) \
+ FOP_START(op) \
+ FOPNOP() \
+ FOP3E(op##w, ax, bx, cl) \
+ FOP3E(op##l, eax, ebx, cl) \
+ ON64(FOP3E(op##q, rax, rbx, cl)) \
+ FOP_END
+
+/* Special case for SETcc - 1 instruction per cc */
+#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
+
+FOP_START(setcc)
+FOP_SETCC(seto)
+FOP_SETCC(setno)
+FOP_SETCC(setc)
+FOP_SETCC(setnc)
+FOP_SETCC(setz)
+FOP_SETCC(setnz)
+FOP_SETCC(setbe)
+FOP_SETCC(setnbe)
+FOP_SETCC(sets)
+FOP_SETCC(setns)
+FOP_SETCC(setp)
+FOP_SETCC(setnp)
+FOP_SETCC(setl)
+FOP_SETCC(setnl)
+FOP_SETCC(setle)
+FOP_SETCC(setnle)
+FOP_END;
+
#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \
do { \
unsigned long _tmp; \
@@ -663,7 +788,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
ulong la;
u32 lim;
u16 sel;
- unsigned cpl, rpl;
+ unsigned cpl;
la = seg_base(ctxt, addr.seg) + addr.ea;
switch (ctxt->mode) {
@@ -697,11 +822,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
goto bad;
}
cpl = ctxt->ops->cpl(ctxt);
- if (ctxt->mode == X86EMUL_MODE_REAL)
- rpl = 0;
- else
- rpl = sel & 3;
- cpl = max(cpl, rpl);
if (!(desc.type & 8)) {
/* data segment */
if (cpl > desc.dpl)
@@ -852,39 +972,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
return rc;
}
-static int test_cc(unsigned int condition, unsigned int flags)
-{
- int rc = 0;
-
- switch ((condition & 15) >> 1) {
- case 0: /* o */
- rc |= (flags & EFLG_OF);
- break;
- case 1: /* b/c/nae */
- rc |= (flags & EFLG_CF);
- break;
- case 2: /* z/e */
- rc |= (flags & EFLG_ZF);
- break;
- case 3: /* be/na */
- rc |= (flags & (EFLG_CF|EFLG_ZF));
- break;
- case 4: /* s */
- rc |= (flags & EFLG_SF);
- break;
- case 5: /* p/pe */
- rc |= (flags & EFLG_PF);
- break;
- case 7: /* le/ng */
- rc |= (flags & EFLG_ZF);
- /* fall through */
- case 6: /* l/nge */
- rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
- break;
- }
-
- /* Odd condition identifiers (lsb == 1) have inverted sense. */
- return (!!rc ^ (condition & 1));
+FASTOP2(add);
+FASTOP2(or);
+FASTOP2(adc);
+FASTOP2(sbb);
+FASTOP2(and);
+FASTOP2(sub);
+FASTOP2(xor);
+FASTOP2(cmp);
+FASTOP2(test);
+
+FASTOP3WCL(shld);
+FASTOP3WCL(shrd);
+
+FASTOP2W(imul);
+
+FASTOP1(not);
+FASTOP1(neg);
+FASTOP1(inc);
+FASTOP1(dec);
+
+FASTOP2CL(rol);
+FASTOP2CL(ror);
+FASTOP2CL(rcl);
+FASTOP2CL(rcr);
+FASTOP2CL(shl);
+FASTOP2CL(shr);
+FASTOP2CL(sar);
+
+FASTOP2W(bsf);
+FASTOP2W(bsr);
+FASTOP2W(bt);
+FASTOP2W(bts);
+FASTOP2W(btr);
+FASTOP2W(btc);
+
+static u8 test_cc(unsigned int condition, unsigned long flags)
+{
+ u8 rc;
+ void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
+
+ flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
+ asm("push %[flags]; popf; call *%[fastop]"
+ : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
+ return rc;
}
static void fetch_register_operand(struct operand *op)
@@ -994,6 +1125,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
ctxt->ops->put_fpu(ctxt);
}
+static int em_fninit(struct x86_emulate_ctxt *ctxt)
+{
+ if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
+ return emulate_nm(ctxt);
+
+ ctxt->ops->get_fpu(ctxt);
+ asm volatile("fninit");
+ ctxt->ops->put_fpu(ctxt);
+ return X86EMUL_CONTINUE;
+}
+
+static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
+{
+ u16 fcw;
+
+ if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
+ return emulate_nm(ctxt);
+
+ ctxt->ops->get_fpu(ctxt);
+ asm volatile("fnstcw %0": "+m"(fcw));
+ ctxt->ops->put_fpu(ctxt);
+
+ /* force 2 byte destination */
+ ctxt->dst.bytes = 2;
+ ctxt->dst.val = fcw;
+
+ return X86EMUL_CONTINUE;
+}
+
+static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
+{
+ u16 fsw;
+
+ if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
+ return emulate_nm(ctxt);
+
+ ctxt->ops->get_fpu(ctxt);
+ asm volatile("fnstsw %0": "+m"(fsw));
+ ctxt->ops->put_fpu(ctxt);
+
+ /* force 2 byte destination */
+ ctxt->dst.bytes = 2;
+ ctxt->dst.val = fsw;
+
+ return X86EMUL_CONTINUE;
+}
+
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
struct operand *op)
{
@@ -1534,6 +1712,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
{
int rc;
+ if (ctxt->d & NoWrite)
+ return X86EMUL_CONTINUE;
+
switch (ctxt->dst.type) {
case OP_REG:
write_register_operand(&ctxt->dst);
@@ -1918,47 +2099,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static int em_grp2(struct x86_emulate_ctxt *ctxt)
-{
- switch (ctxt->modrm_reg) {
- case 0: /* rol */
- emulate_2op_SrcB(ctxt, "rol");
- break;
- case 1: /* ror */
- emulate_2op_SrcB(ctxt, "ror");
- break;
- case 2: /* rcl */
- emulate_2op_SrcB(ctxt, "rcl");
- break;
- case 3: /* rcr */
- emulate_2op_SrcB(ctxt, "rcr");
- break;
- case 4: /* sal/shl */
- case 6: /* sal/shl */
- emulate_2op_SrcB(ctxt, "sal");
- break;
- case 5: /* shr */
- emulate_2op_SrcB(ctxt, "shr");
- break;
- case 7: /* sar */
- emulate_2op_SrcB(ctxt, "sar");
- break;
- }
- return X86EMUL_CONTINUE;
-}
-
-static int em_not(struct x86_emulate_ctxt *ctxt)
-{
- ctxt->dst.val = ~ctxt->dst.val;
- return X86EMUL_CONTINUE;
-}
-
-static int em_neg(struct x86_emulate_ctxt *ctxt)
-{
- emulate_1op(ctxt, "neg");
- return X86EMUL_CONTINUE;
-}
-
static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
{
u8 ex = 0;
@@ -2000,12 +2140,6 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
int rc = X86EMUL_CONTINUE;
switch (ctxt->modrm_reg) {
- case 0: /* inc */
- emulate_1op(ctxt, "inc");
- break;
- case 1: /* dec */
- emulate_1op(ctxt, "dec");
- break;
case 2: /* call near abs */ {
long int old_eip;
old_eip = ctxt->_eip;
@@ -2075,7 +2209,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
/* Save real source value, then compare EAX against destination. */
ctxt->src.orig_val = ctxt->src.val;
ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
- emulate_2op_SrcV(ctxt, "cmp");
+ fastop(ctxt, em_cmp);
if (ctxt->eflags & EFLG_ZF) {
/* Success: write back to memory. */
@@ -2843,7 +2977,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
ctxt->src.type = OP_IMM;
ctxt->src.val = 0;
ctxt->src.bytes = 1;
- emulate_2op_SrcV(ctxt, "or");
+ fastop(ctxt, em_or);
ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
if (cf)
ctxt->eflags |= X86_EFLAGS_CF;
@@ -2852,6 +2986,24 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int em_aad(struct x86_emulate_ctxt *ctxt)
+{
+ u8 al = ctxt->dst.val & 0xff;
+ u8 ah = (ctxt->dst.val >> 8) & 0xff;
+
+ al = (al + (ah * ctxt->src.val)) & 0xff;
+
+ ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
+
+ /* Set PF, ZF, SF */
+ ctxt->src.type = OP_IMM;
+ ctxt->src.val = 0;
+ ctxt->src.bytes = 1;
+ fastop(ctxt, em_or);
+
+ return X86EMUL_CONTINUE;
+}
+
static int em_call(struct x86_emulate_ctxt *ctxt)
{
long rel = ctxt->src.val;
@@ -2900,64 +3052,6 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static int em_add(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV(ctxt, "add");
- return X86EMUL_CONTINUE;
-}
-
-static int em_or(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV(ctxt, "or");
- return X86EMUL_CONTINUE;
-}
-
-static int em_adc(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV(ctxt, "adc");
- return X86EMUL_CONTINUE;
-}
-
-static int em_sbb(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV(ctxt, "sbb");
- return X86EMUL_CONTINUE;
-}
-
-static int em_and(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV(ctxt, "and");
- return X86EMUL_CONTINUE;
-}
-
-static int em_sub(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV(ctxt, "sub");
- return X86EMUL_CONTINUE;
-}
-
-static int em_xor(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV(ctxt, "xor");
- return X86EMUL_CONTINUE;
-}
-
-static int em_cmp(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV(ctxt, "cmp");
- /* Disable writeback. */
- ctxt->dst.type = OP_NONE;
- return X86EMUL_CONTINUE;
-}
-
-static int em_test(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV(ctxt, "test");
- /* Disable writeback. */
- ctxt->dst.type = OP_NONE;
- return X86EMUL_CONTINUE;
-}
-
static int em_xchg(struct x86_emulate_ctxt *ctxt)
{
/* Write back the register source. */
@@ -2970,16 +3064,10 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static int em_imul(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV_nobyte(ctxt, "imul");
- return X86EMUL_CONTINUE;
-}
-
static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
{
ctxt->dst.val = ctxt->src2.val;
- return em_imul(ctxt);
+ return fastop(ctxt, em_imul);
}
static int em_cwd(struct x86_emulate_ctxt *ctxt)
@@ -3300,47 +3388,6 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static int em_bt(struct x86_emulate_ctxt *ctxt)
-{
- /* Disable writeback. */
- ctxt->dst.type = OP_NONE;
- /* only subword offset */
- ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
-
- emulate_2op_SrcV_nobyte(ctxt, "bt");
- return X86EMUL_CONTINUE;
-}
-
-static int em_bts(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV_nobyte(ctxt, "bts");
- return X86EMUL_CONTINUE;
-}
-
-static int em_btr(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV_nobyte(ctxt, "btr");
- return X86EMUL_CONTINUE;
-}
-
-static int em_btc(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV_nobyte(ctxt, "btc");
- return X86EMUL_CONTINUE;
-}
-
-static int em_bsf(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV_nobyte(ctxt, "bsf");
- return X86EMUL_CONTINUE;
-}
-
-static int em_bsr(struct x86_emulate_ctxt *ctxt)
-{
- emulate_2op_SrcV_nobyte(ctxt, "bsr");
- return X86EMUL_CONTINUE;
-}
-
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
{
u32 eax, ebx, ecx, edx;
@@ -3572,7 +3619,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
+#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
+#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
#define II(_f, _e, _i) \
{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
#define IIP(_f, _e, _i, _p) \
@@ -3583,12 +3632,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
#define D2bv(_f) D((_f) | ByteOp), D(_f)
#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
+#define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
#define I2bvIP(_f, _e, _i, _p) \
IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
-#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \
- I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
- I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
+#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
+ F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
+ F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
static const struct opcode group7_rm1[] = {
DI(SrcNone | Priv, monitor),
@@ -3614,25 +3664,36 @@ static const struct opcode group7_rm7[] = {
};
static const struct opcode group1[] = {
- I(Lock, em_add),
- I(Lock | PageTable, em_or),
- I(Lock, em_adc),
- I(Lock, em_sbb),
- I(Lock | PageTable, em_and),
- I(Lock, em_sub),
- I(Lock, em_xor),
- I(0, em_cmp),
+ F(Lock, em_add),
+ F(Lock | PageTable, em_or),
+ F(Lock, em_adc),
+ F(Lock, em_sbb),
+ F(Lock | PageTable, em_and),
+ F(Lock, em_sub),
+ F(Lock, em_xor),
+ F(NoWrite, em_cmp),
};
static const struct opcode group1A[] = {
I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
};
+static const struct opcode group2[] = {
+ F(DstMem | ModRM, em_rol),
+ F(DstMem | ModRM, em_ror),
+ F(DstMem | ModRM, em_rcl),
+ F(DstMem | ModRM, em_rcr),
+ F(DstMem | ModRM, em_shl),
+ F(DstMem | ModRM, em_shr),
+ F(DstMem | ModRM, em_shl),
+ F(DstMem | ModRM, em_sar),
+};
+
static const struct opcode group3[] = {
- I(DstMem | SrcImm, em_test),
- I(DstMem | SrcImm, em_test),
- I(DstMem | SrcNone | Lock, em_not),
- I(DstMem | SrcNone | Lock, em_neg),
+ F(DstMem | SrcImm | NoWrite, em_test),
+ F(DstMem | SrcImm | NoWrite, em_test),
+ F(DstMem | SrcNone | Lock, em_not),
+ F(DstMem | SrcNone | Lock, em_neg),
I(SrcMem, em_mul_ex),
I(SrcMem, em_imul_ex),
I(SrcMem, em_div_ex),
@@ -3640,14 +3701,14 @@ static const struct opcode group3[] = {
};
static const struct opcode group4[] = {
- I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
- I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
+ F(ByteOp | DstMem | SrcNone | Lock, em_inc),
+ F(ByteOp | DstMem | SrcNone | Lock, em_dec),
N, N, N, N, N, N,
};
static const struct opcode group5[] = {
- I(DstMem | SrcNone | Lock, em_grp45),
- I(DstMem | SrcNone | Lock, em_grp45),
+ F(DstMem | SrcNone | Lock, em_inc),
+ F(DstMem | SrcNone | Lock, em_dec),
I(SrcMem | Stack, em_grp45),
I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
I(SrcMem | Stack, em_grp45),
@@ -3682,10 +3743,10 @@ static const struct group_dual group7 = { {
static const struct opcode group8[] = {
N, N, N, N,
- I(DstMem | SrcImmByte, em_bt),
- I(DstMem | SrcImmByte | Lock | PageTable, em_bts),
- I(DstMem | SrcImmByte | Lock, em_btr),
- I(DstMem | SrcImmByte | Lock | PageTable, em_btc),
+ F(DstMem | SrcImmByte | NoWrite, em_bt),
+ F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
+ F(DstMem | SrcImmByte | Lock, em_btr),
+ F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
};
static const struct group_dual group9 = { {
@@ -3707,33 +3768,96 @@ static const struct gprefix pfx_vmovntpx = {
I(0, em_mov), N, N, N,
};
+static const struct escape escape_d9 = { {
+ N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
+}, {
+ /* 0xC0 - 0xC7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xC8 - 0xCF */
+ N, N, N, N, N, N, N, N,
+ /* 0xD0 - 0xC7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xD8 - 0xDF */
+ N, N, N, N, N, N, N, N,
+ /* 0xE0 - 0xE7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xE8 - 0xEF */
+ N, N, N, N, N, N, N, N,
+ /* 0xF0 - 0xF7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xF8 - 0xFF */
+ N, N, N, N, N, N, N, N,
+} };
+
+static const struct escape escape_db = { {
+ N, N, N, N, N, N, N, N,
+}, {
+ /* 0xC0 - 0xC7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xC8 - 0xCF */
+ N, N, N, N, N, N, N, N,
+ /* 0xD0 - 0xC7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xD8 - 0xDF */
+ N, N, N, N, N, N, N, N,
+ /* 0xE0 - 0xE7 */
+ N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
+ /* 0xE8 - 0xEF */
+ N, N, N, N, N, N, N, N,
+ /* 0xF0 - 0xF7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xF8 - 0xFF */
+ N, N, N, N, N, N, N, N,
+} };
+
+static const struct escape escape_dd = { {
+ N, N, N, N, N, N, N, I(DstMem, em_fnstsw),
+}, {
+ /* 0xC0 - 0xC7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xC8 - 0xCF */
+ N, N, N, N, N, N, N, N,
+ /* 0xD0 - 0xC7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xD8 - 0xDF */
+ N, N, N, N, N, N, N, N,
+ /* 0xE0 - 0xE7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xE8 - 0xEF */
+ N, N, N, N, N, N, N, N,
+ /* 0xF0 - 0xF7 */
+ N, N, N, N, N, N, N, N,
+ /* 0xF8 - 0xFF */
+ N, N, N, N, N, N, N, N,
+} };
+
static const struct opcode opcode_table[256] = {
/* 0x00 - 0x07 */
- I6ALU(Lock, em_add),
+ F6ALU(Lock, em_add),
I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
/* 0x08 - 0x0F */
- I6ALU(Lock | PageTable, em_or),
+ F6ALU(Lock | PageTable, em_or),
I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
N,
/* 0x10 - 0x17 */
- I6ALU(Lock, em_adc),
+ F6ALU(Lock, em_adc),
I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
/* 0x18 - 0x1F */
- I6ALU(Lock, em_sbb),
+ F6ALU(Lock, em_sbb),
I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
/* 0x20 - 0x27 */
- I6ALU(Lock | PageTable, em_and), N, N,
+ F6ALU(Lock | PageTable, em_and), N, N,
/* 0x28 - 0x2F */
- I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
+ F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
/* 0x30 - 0x37 */
- I6ALU(Lock, em_xor), N, N,
+ F6ALU(Lock, em_xor), N, N,
/* 0x38 - 0x3F */
- I6ALU(0, em_cmp), N, N,
+ F6ALU(NoWrite, em_cmp), N, N,
/* 0x40 - 0x4F */
- X16(D(DstReg)),
+ X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
/* 0x50 - 0x57 */
X8(I(SrcReg | Stack, em_push)),
/* 0x58 - 0x5F */
@@ -3757,7 +3881,7 @@ static const struct opcode opcode_table[256] = {
G(DstMem | SrcImm, group1),
G(ByteOp | DstMem | SrcImm | No64, group1),
G(DstMem | SrcImmByte, group1),
- I2bv(DstMem | SrcReg | ModRM, em_test),
+ F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
/* 0x88 - 0x8F */
I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
@@ -3777,18 +3901,18 @@ static const struct opcode opcode_table[256] = {
I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
I2bv(SrcSI | DstDI | Mov | String, em_mov),
- I2bv(SrcSI | DstDI | String, em_cmp),
+ F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
/* 0xA8 - 0xAF */
- I2bv(DstAcc | SrcImm, em_test),
+ F2bv(DstAcc | SrcImm | NoWrite, em_test),
I2bv(SrcAcc | DstDI | Mov | String, em_mov),
I2bv(SrcSI | DstAcc | Mov | String, em_mov),
- I2bv(SrcAcc | DstDI | String, em_cmp),
+ F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
/* 0xB0 - 0xB7 */
X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
/* 0xB8 - 0xBF */
- X8(I(DstReg | SrcImm | Mov, em_mov)),
+ X8(I(DstReg | SrcImm64 | Mov, em_mov)),
/* 0xC0 - 0xC7 */
- D2bv(DstMem | SrcImmByte | ModRM),
+ G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
I(ImplicitOps | Stack, em_ret),
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
@@ -3800,10 +3924,11 @@ static const struct opcode opcode_table[256] = {
D(ImplicitOps), DI(SrcImmByte, intn),
D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
/* 0xD0 - 0xD7 */
- D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
- N, N, N, N,
+ G(Src2One | ByteOp, group2), G(Src2One, group2),
+ G(Src2CL | ByteOp, group2), G(Src2CL, group2),
+ N, I(DstAcc | SrcImmByte | No64, em_aad), N, N,
/* 0xD8 - 0xDF */
- N, N, N, N, N, N, N, N,
+ N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
/* 0xE0 - 0xE7 */
X3(I(SrcImmByte, em_loop)),
I(SrcImmByte, em_jcxz),
@@ -3870,28 +3995,29 @@ static const struct opcode twobyte_table[256] = {
X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
/* 0xA0 - 0xA7 */
I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
- II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt),
- D(DstMem | SrcReg | Src2ImmByte | ModRM),
- D(DstMem | SrcReg | Src2CL | ModRM), N, N,
+ II(ImplicitOps, em_cpuid, cpuid),
+ F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
+ F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
+ F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
/* 0xA8 - 0xAF */
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
DI(ImplicitOps, rsm),
- I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
- D(DstMem | SrcReg | Src2ImmByte | ModRM),
- D(DstMem | SrcReg | Src2CL | ModRM),
- D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
+ F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
+ F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
+ F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
+ D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),
/* 0xB0 - 0xB7 */
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
- I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
+ F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
/* 0xB8 - 0xBF */
N, N,
G(BitOp, group8),
- I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
- I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr),
+ F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
+ F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
/* 0xC0 - 0xC7 */
D2bv(DstMem | SrcReg | ModRM | Lock),
@@ -3950,6 +4076,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
case 4:
op->val = insn_fetch(s32, ctxt);
break;
+ case 8:
+ op->val = insn_fetch(s64, ctxt);
+ break;
}
if (!sign_extension) {
switch (op->bytes) {
@@ -4028,6 +4157,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
case OpImm:
rc = decode_imm(ctxt, op, imm_size(ctxt), true);
break;
+ case OpImm64:
+ rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
+ break;
case OpMem8:
ctxt->memop.bytes = 1;
goto mem_common;
@@ -4222,6 +4354,12 @@ done_prefixes:
case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
}
break;
+ case Escape:
+ if (ctxt->modrm > 0xbf)
+ opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
+ else
+ opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
+ break;
default:
return EMULATION_FAILED;
}
@@ -4354,6 +4492,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
}
+static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
+{
+ ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
+ fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
+ asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
+ : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
+ : "c"(ctxt->src2.val), [fastop]"S"(fop));
+ ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
+ return X86EMUL_CONTINUE;
+}
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
{
@@ -4483,6 +4631,13 @@ special_insn:
}
if (ctxt->execute) {
+ if (ctxt->d & Fastop) {
+ void (*fop)(struct fastop *) = (void *)ctxt->execute;
+ rc = fastop(ctxt, fop);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ goto writeback;
+ }
rc = ctxt->execute(ctxt);
if (rc != X86EMUL_CONTINUE)
goto done;
@@ -4493,12 +4648,6 @@ special_insn:
goto twobyte_insn;
switch (ctxt->b) {
- case 0x40 ... 0x47: /* inc r16/r32 */
- emulate_1op(ctxt, "inc");
- break;
- case 0x48 ... 0x4f: /* dec r16/r32 */
- emulate_1op(ctxt, "dec");
- break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
goto cannot_emulate;
@@ -4523,9 +4672,6 @@ special_insn:
case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
}
break;
- case 0xc0 ... 0xc1:
- rc = em_grp2(ctxt);
- break;
case 0xcc: /* int3 */
rc = emulate_int(ctxt, 3);
break;
@@ -4536,13 +4682,6 @@ special_insn:
if (ctxt->eflags & EFLG_OF)
rc = emulate_int(ctxt, 4);
break;
- case 0xd0 ... 0xd1: /* Grp2 */
- rc = em_grp2(ctxt);
- break;
- case 0xd2 ... 0xd3: /* Grp2 */
- ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX);
- rc = em_grp2(ctxt);
- break;
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
jmp_rel(ctxt, ctxt->src.val);
@@ -4661,14 +4800,6 @@ twobyte_insn:
case 0x90 ... 0x9f: /* setcc r/m8 */
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
break;
- case 0xa4: /* shld imm8, r, r/m */
- case 0xa5: /* shld cl, r, r/m */
- emulate_2op_cl(ctxt, "shld");
- break;
- case 0xac: /* shrd imm8, r, r/m */
- case 0xad: /* shrd cl, r, r/m */
- emulate_2op_cl(ctxt, "shrd");
- break;
case 0xae: /* clflush */
break;
case 0xb6 ... 0xb7: /* movzx */
@@ -4682,7 +4813,7 @@ twobyte_insn:
(s16) ctxt->src.val;
break;
case 0xc0 ... 0xc1: /* xadd */
- emulate_2op_SrcV(ctxt, "add");
+ fastop(ctxt, em_add);
/* Write back the register source. */
ctxt->src.val = ctxt->dst.orig_val;
write_register_operand(&ctxt->src);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 11300d2fa71..c1d30b2fc9b 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -122,7 +122,6 @@ static s64 __kpit_elapsed(struct kvm *kvm)
*/
remaining = hrtimer_get_remaining(&ps->timer);
elapsed = ps->period - ktime_to_ns(remaining);
- elapsed = mod_64(elapsed, ps->period);
return elapsed;
}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 848206df096..cc31f7c06d3 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -241,6 +241,8 @@ int kvm_pic_read_irq(struct kvm *kvm)
int irq, irq2, intno;
struct kvm_pic *s = pic_irqchip(kvm);
+ s->output = 0;
+
pic_lock(s);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 7e06ba1618b..484bc874688 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,49 +38,81 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
/*
+ * check if there is pending interrupt from
+ * non-APIC source without intack.
+ */
+static int kvm_cpu_has_extint(struct kvm_vcpu *v)
+{
+ if (kvm_apic_accept_pic_intr(v))
+ return pic_irqchip(v->kvm)->output; /* PIC */
+ else
+ return 0;
+}
+
+/*
+ * check if there is injectable interrupt:
+ * when virtual interrupt delivery enabled,
+ * interrupt from apic will handled by hardware,
+ * we don't need to check it here.
+ */
+int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
+{
+ if (!irqchip_in_kernel(v->kvm))
+ return v->arch.interrupt.pending;
+
+ if (kvm_cpu_has_extint(v))
+ return 1;
+
+ if (kvm_apic_vid_enabled(v->kvm))
+ return 0;
+
+ return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
+}
+
+/*
* check if there is pending interrupt without
* intack.
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
- struct kvm_pic *s;
-
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.pending;
- if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
- if (kvm_apic_accept_pic_intr(v)) {
- s = pic_irqchip(v->kvm); /* PIC */
- return s->output;
- } else
- return 0;
- }
- return 1;
+ if (kvm_cpu_has_extint(v))
+ return 1;
+
+ return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
}
EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
/*
+ * Read pending interrupt(from non-APIC source)
+ * vector and intack.
+ */
+static int kvm_cpu_get_extint(struct kvm_vcpu *v)
+{
+ if (kvm_cpu_has_extint(v))
+ return kvm_pic_read_irq(v->kvm); /* PIC */
+ return -1;
+}
+
+/*
* Read pending interrupt vector and intack.
*/
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
{
- struct kvm_pic *s;
int vector;
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.nr;
- vector = kvm_get_apic_interrupt(v); /* APIC */
- if (vector == -1) {
- if (kvm_apic_accept_pic_intr(v)) {
- s = pic_irqchip(v->kvm);
- s->output = 0; /* PIC */
- vector = kvm_pic_read_irq(v->kvm);
- }
- }
- return vector;
+ vector = kvm_cpu_get_extint(v);
+
+ if (kvm_apic_vid_enabled(v->kvm) || vector != -1)
+ return vector; /* PIC */
+
+ return kvm_get_apic_interrupt(v); /* APIC */
}
-EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9392f527f10..02b51dd4e4a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic)
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
-static inline int apic_x2apic_mode(struct kvm_lapic *apic)
-{
- return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
-}
-
static inline int kvm_apic_id(struct kvm_lapic *apic)
{
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
-static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
+void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
+ struct kvm_lapic_irq *irq,
+ u64 *eoi_exit_bitmap)
{
- u16 cid;
- ldr >>= 32 - map->ldr_bits;
- cid = (ldr >> map->cid_shift) & map->cid_mask;
+ struct kvm_lapic **dst;
+ struct kvm_apic_map *map;
+ unsigned long bitmap = 1;
+ int i;
- BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
+ rcu_read_lock();
+ map = rcu_dereference(vcpu->kvm->arch.apic_map);
- return cid;
-}
+ if (unlikely(!map)) {
+ __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
+ goto out;
+ }
-static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
-{
- ldr >>= (32 - map->ldr_bits);
- return ldr & map->lid_mask;
+ if (irq->dest_mode == 0) { /* physical mode */
+ if (irq->delivery_mode == APIC_DM_LOWEST ||
+ irq->dest_id == 0xff) {
+ __set_bit(irq->vector,
+ (unsigned long *)eoi_exit_bitmap);
+ goto out;
+ }
+ dst = &map->phys_map[irq->dest_id & 0xff];
+ } else {
+ u32 mda = irq->dest_id << (32 - map->ldr_bits);
+
+ dst = map->logical_map[apic_cluster_id(map, mda)];
+
+ bitmap = apic_logical_id(map, mda);
+ }
+
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dst[i])
+ continue;
+ if (dst[i]->vcpu == vcpu) {
+ __set_bit(irq->vector,
+ (unsigned long *)eoi_exit_bitmap);
+ break;
+ }
+ }
+
+out:
+ rcu_read_unlock();
}
static void recalculate_apic_map(struct kvm *kvm)
@@ -230,6 +255,8 @@ out:
if (old)
kfree_rcu(old, rcu);
+
+ kvm_ioapic_make_eoibitmap_request(kvm);
}
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
@@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
{
int result;
+ /*
+ * Note that irr_pending is just a hint. It will be always
+ * true with virtual interrupt delivery enabled.
+ */
if (!apic->irr_pending)
return -1;
@@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
static inline int apic_find_highest_isr(struct kvm_lapic *apic)
{
int result;
+
+ /* Note that isr_count is always 1 with vid enabled */
if (!apic->isr_count)
return -1;
if (likely(apic->highest_isr_cache != -1))
@@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
}
+static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
+{
+ if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
+ kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
+ int trigger_mode;
+ if (apic_test_vector(vector, apic->regs + APIC_TMR))
+ trigger_mode = IOAPIC_LEVEL_TRIG;
+ else
+ trigger_mode = IOAPIC_EDGE_TRIG;
+ kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+ }
+}
+
static int apic_set_eoi(struct kvm_lapic *apic)
{
int vector = apic_find_highest_isr(apic);
@@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic)
apic_clear_isr(vector, apic);
apic_update_ppr(apic);
- if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
- kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
- int trigger_mode;
- if (apic_test_vector(vector, apic->regs + APIC_TMR))
- trigger_mode = IOAPIC_LEVEL_TRIG;
- else
- trigger_mode = IOAPIC_EDGE_TRIG;
- kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
- }
+ kvm_ioapic_send_eoi(apic, vector);
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
return vector;
}
+/*
+ * this interface assumes a trap-like exit, which has already finished
+ * desired side effect including vISR and vPPR update.
+ */
+void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ trace_kvm_eoi(apic, vector);
+
+ kvm_ioapic_send_eoi(apic, vector);
+ kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
+
static void apic_send_ipi(struct kvm_lapic *apic)
{
u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
@@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
+/* emulate APIC access in a trap manner */
+void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
+{
+ u32 val = 0;
+
+ /* hw has done the conditional check and inst decode */
+ offset &= 0xff0;
+
+ apic_reg_read(vcpu->arch.apic, offset, 4, &val);
+
+ /* TODO: optimize to just emulate side effect w/o one more write */
+ apic_reg_write(vcpu->arch.apic, offset, val);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
+
void kvm_free_lapic(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
{
+ u64 old_value = vcpu->arch.apic_base;
struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic) {
@@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
value &= ~MSR_IA32_APICBASE_BSP;
vcpu->arch.apic_base = value;
- if (apic_x2apic_mode(apic)) {
- u32 id = kvm_apic_id(apic);
- u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
- kvm_apic_set_ldr(apic, ldr);
+ if ((old_value ^ value) & X2APIC_ENABLE) {
+ if (value & X2APIC_ENABLE) {
+ u32 id = kvm_apic_id(apic);
+ u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
+ kvm_apic_set_ldr(apic, ldr);
+ kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
+ } else
+ kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
}
+
apic->base_address = apic->vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE;
@@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
- apic->irr_pending = false;
- apic->isr_count = 0;
+ apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
+ apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
apic->highest_isr_cache = -1;
update_divide_count(apic);
atomic_set(&apic->lapic_timer.pending, 0);
@@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
update_divide_count(apic);
start_apic_timer(apic);
apic->irr_pending = true;
- apic->isr_count = count_vectors(apic->regs + APIC_ISR);
+ apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
+ 1 : count_vectors(apic->regs + APIC_ISR);
apic->highest_isr_cache = -1;
+ kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index e5ebf9f3571..1676d34ddb4 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
+void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
+void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
+
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
@@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
}
+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
+{
+ return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
+}
+
+static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
+{
+ return kvm_x86_ops->vm_has_apicv(kvm);
+}
+
+static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
+{
+ u16 cid;
+ ldr >>= 32 - map->ldr_bits;
+ cid = (ldr >> map->cid_shift) & map->cid_mask;
+
+ BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
+
+ return cid;
+}
+
+static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
+{
+ ldr >>= (32 - map->ldr_bits);
+ return ldr & map->lid_mask;
+}
+
+void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
+ struct kvm_lapic_irq *irq,
+ u64 *eoi_bitmap);
+
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01d7c2ad05f..4ed3edbe06b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
static bool spte_is_locklessly_modifiable(u64 spte)
{
- return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
+ return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
+ (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
}
static bool spte_has_volatile_bits(u64 spte)
@@ -831,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
if (host_level == PT_PAGE_TABLE_LEVEL)
return host_level;
- max_level = kvm_x86_ops->get_lpage_level() < host_level ?
- kvm_x86_ops->get_lpage_level() : host_level;
+ max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
@@ -1142,7 +1142,7 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
}
static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
- int level, bool pt_protect)
+ bool pt_protect)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1180,7 +1180,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
while (mask) {
rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
PT_PAGE_TABLE_LEVEL, slot);
- __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false);
+ __rmap_write_protect(kvm, rmapp, false);
/* clear the first set bit */
mask &= mask - 1;
@@ -1199,7 +1199,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
for (i = PT_PAGE_TABLE_LEVEL;
i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
rmapp = __gfn_to_rmap(gfn, i, slot);
- write_protected |= __rmap_write_protect(kvm, rmapp, i, true);
+ write_protected |= __rmap_write_protect(kvm, rmapp, true);
}
return write_protected;
@@ -1460,28 +1460,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
}
-/*
- * Remove the sp from shadow page cache, after call it,
- * we can not find this sp from the cache, and the shadow
- * page table is still valid.
- * It should be under the protection of mmu lock.
- */
-static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp)
+static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
{
ASSERT(is_empty_shadow_page(sp->spt));
hlist_del(&sp->hash_link);
- if (!sp->role.direct)
- free_page((unsigned long)sp->gfns);
-}
-
-/*
- * Free the shadow page table and the sp, we can do it
- * out of the protection of mmu lock.
- */
-static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
-{
list_del(&sp->link);
free_page((unsigned long)sp->spt);
+ if (!sp->role.direct)
+ free_page((unsigned long)sp->gfns);
kmem_cache_free(mmu_page_header_cache, sp);
}
@@ -1522,7 +1508,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
- bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM);
sp->parent_ptes = 0;
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
@@ -1973,9 +1958,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
{
u64 spte;
- spte = __pa(sp->spt)
- | PT_PRESENT_MASK | PT_ACCESSED_MASK
- | PT_WRITABLE_MASK | PT_USER_MASK;
+ spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
+ shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
+
mmu_spte_set(sptep, spte);
}
@@ -2126,7 +2111,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
do {
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
WARN_ON(!sp->role.invalid || sp->root_count);
- kvm_mmu_isolate_page(sp);
kvm_mmu_free_page(sp);
} while (!list_empty(invalid_list));
}
@@ -2144,6 +2128,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
* change the value
*/
+ spin_lock(&kvm->mmu_lock);
+
if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages &&
!list_empty(&kvm->arch.active_mmu_pages)) {
@@ -2158,6 +2144,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
}
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
+
+ spin_unlock(&kvm->mmu_lock);
}
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
@@ -2183,14 +2171,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
-static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
-{
- int slot = memslot_id(kvm, gfn);
- struct kvm_mmu_page *sp = page_header(__pa(pte));
-
- __set_bit(slot, sp->slot_bitmap);
-}
-
/*
* The function is based on mtrr_type_lookup() in
* arch/x86/kernel/cpu/mtrr/generic.c
@@ -2332,9 +2312,8 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
if (s->role.level != PT_PAGE_TABLE_LEVEL)
return 1;
- if (!need_unsync && !s->unsync) {
+ if (!s->unsync)
need_unsync = true;
- }
}
if (need_unsync)
kvm_unsync_pages(vcpu, gfn);
@@ -2342,8 +2321,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
}
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
- unsigned pte_access, int user_fault,
- int write_fault, int level,
+ unsigned pte_access, int level,
gfn_t gfn, pfn_t pfn, bool speculative,
bool can_unsync, bool host_writable)
{
@@ -2378,20 +2356,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= (u64)pfn << PAGE_SHIFT;
- if ((pte_access & ACC_WRITE_MASK)
- || (!vcpu->arch.mmu.direct_map && write_fault
- && !is_write_protection(vcpu) && !user_fault)) {
+ if (pte_access & ACC_WRITE_MASK) {
/*
- * There are two cases:
- * - the one is other vcpu creates new sp in the window
- * between mapping_level() and acquiring mmu-lock.
- * - the another case is the new sp is created by itself
- * (page-fault path) when guest uses the target gfn as
- * its page table.
- * Both of these cases can be fixed by allowing guest to
- * retry the access, it will refault, then we can establish
- * the mapping by using small page.
+ * Other vcpu creates new sp in the window between
+ * mapping_level() and acquiring mmu-lock. We can
+ * allow guest to retry the access, the mapping can
+ * be fixed if guest refault.
*/
if (level > PT_PAGE_TABLE_LEVEL &&
has_wrprotected_page(vcpu->kvm, gfn, level))
@@ -2399,19 +2370,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
- if (!vcpu->arch.mmu.direct_map
- && !(pte_access & ACC_WRITE_MASK)) {
- spte &= ~PT_USER_MASK;
- /*
- * If we converted a user page to a kernel page,
- * so that the kernel can write to it when cr0.wp=0,
- * then we should prevent the kernel from executing it
- * if SMEP is enabled.
- */
- if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
- spte |= PT64_NX_MASK;
- }
-
/*
* Optimization: for pte sync, if spte was writable the hash
* lookup is unnecessary (and expensive). Write protection
@@ -2441,19 +2399,15 @@ done:
}
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
- unsigned pt_access, unsigned pte_access,
- int user_fault, int write_fault,
- int *emulate, int level, gfn_t gfn,
- pfn_t pfn, bool speculative,
+ unsigned pte_access, int write_fault, int *emulate,
+ int level, gfn_t gfn, pfn_t pfn, bool speculative,
bool host_writable)
{
int was_rmapped = 0;
int rmap_count;
- pgprintk("%s: spte %llx access %x write_fault %d"
- " user_fault %d gfn %llx\n",
- __func__, *sptep, pt_access,
- write_fault, user_fault, gfn);
+ pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
+ *sptep, write_fault, gfn);
if (is_rmap_spte(*sptep)) {
/*
@@ -2477,9 +2431,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
was_rmapped = 1;
}
- if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
- level, gfn, pfn, speculative, true,
- host_writable)) {
+ if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
+ true, host_writable)) {
if (write_fault)
*emulate = 1;
kvm_mmu_flush_tlb(vcpu);
@@ -2497,7 +2450,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
++vcpu->kvm->stat.lpages;
if (is_shadow_present_pte(*sptep)) {
- page_header_update_slot(vcpu->kvm, sptep, gfn);
if (!was_rmapped) {
rmap_count = rmap_add(vcpu, sptep, gfn);
if (rmap_count > RMAP_RECYCLE_THRESHOLD)
@@ -2571,10 +2523,9 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
return -1;
for (i = 0; i < ret; i++, gfn++, start++)
- mmu_set_spte(vcpu, start, ACC_ALL,
- access, 0, 0, NULL,
- sp->role.level, gfn,
- page_to_pfn(pages[i]), true, true);
+ mmu_set_spte(vcpu, start, access, 0, NULL,
+ sp->role.level, gfn, page_to_pfn(pages[i]),
+ true, true);
return 0;
}
@@ -2633,11 +2584,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
- unsigned pte_access = ACC_ALL;
-
- mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
- 0, write, &emulate,
- level, gfn, pfn, prefault, map_writable);
+ mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
+ write, &emulate, level, gfn, pfn,
+ prefault, map_writable);
direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed;
break;
@@ -2652,11 +2601,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
iterator.level - 1,
1, ACC_ALL, iterator.sptep);
- mmu_spte_set(iterator.sptep,
- __pa(sp->spt)
- | PT_PRESENT_MASK | PT_WRITABLE_MASK
- | shadow_user_mask | shadow_x_mask
- | shadow_accessed_mask);
+ link_shadow_page(iterator.sptep, sp);
}
}
return emulate;
@@ -3719,6 +3664,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
else
r = paging32_init_context(vcpu, context);
+ vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
vcpu->arch.mmu.base_role.smep_andnot_wp
@@ -3885,7 +3831,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
*gpa &= ~(gpa_t)7;
*bytes = 8;
- r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8));
+ r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);
if (r)
gentry = 0;
new = (const u8 *)&gentry;
@@ -4039,7 +3985,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
& mask.word) && rmap_can_add(vcpu))
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
- if (!remote_flush && need_remote_flush(entry, *spte))
+ if (need_remote_flush(entry, *spte))
remote_flush = true;
++spte;
}
@@ -4198,26 +4144,36 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
{
- struct kvm_mmu_page *sp;
- bool flush = false;
+ struct kvm_memory_slot *memslot;
+ gfn_t last_gfn;
+ int i;
- list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
- int i;
- u64 *pt;
+ memslot = id_to_memslot(kvm->memslots, slot);
+ last_gfn = memslot->base_gfn + memslot->npages - 1;
- if (!test_bit(slot, sp->slot_bitmap))
- continue;
+ spin_lock(&kvm->mmu_lock);
- pt = sp->spt;
- for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
- if (!is_shadow_present_pte(pt[i]) ||
- !is_last_spte(pt[i], sp->role.level))
- continue;
+ for (i = PT_PAGE_TABLE_LEVEL;
+ i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
+ unsigned long *rmapp;
+ unsigned long last_index, index;
- spte_write_protect(kvm, &pt[i], &flush, false);
+ rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
+ last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
+
+ for (index = 0; index <= last_index; ++index, ++rmapp) {
+ if (*rmapp)
+ __rmap_write_protect(kvm, rmapp, false);
+
+ if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ kvm_flush_remote_tlbs(kvm);
+ cond_resched_lock(&kvm->mmu_lock);
+ }
}
}
+
kvm_flush_remote_tlbs(kvm);
+ spin_unlock(&kvm->mmu_lock);
}
void kvm_mmu_zap_all(struct kvm *kvm)
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index cd6e98333ba..b8f6172f417 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -195,12 +195,6 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
TP_ARGS(sp)
);
-DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages,
- TP_PROTO(struct kvm_mmu_page *sp),
-
- TP_ARGS(sp)
-);
-
TRACE_EVENT(
mark_mmio_spte,
TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access),
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 891eb6d93b8..105dd5bd550 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
- unsigned index, pt_access, pte_access, accessed_dirty, shift;
+ unsigned index, pt_access, pte_access, accessed_dirty;
gpa_t pte_gpa;
int offset;
const int write_fault = access & PFERR_WRITE_MASK;
@@ -249,16 +249,12 @@ retry_walk:
if (!write_fault)
protect_clean_gpte(&pte_access, pte);
-
- /*
- * On a write fault, fold the dirty bit into accessed_dirty by shifting it one
- * place right.
- *
- * On a read fault, do nothing.
- */
- shift = write_fault >> ilog2(PFERR_WRITE_MASK);
- shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
- accessed_dirty &= pte >> shift;
+ else
+ /*
+ * On a write fault, fold the dirty bit into accessed_dirty by
+ * shifting it one place right.
+ */
+ accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT);
if (unlikely(!accessed_dirty)) {
ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
@@ -330,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* we call mmu_set_spte() with host_writable = true because
* pte_prefetch_gfn_to_pfn always gets a writable pfn.
*/
- mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
- NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
+ mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL,
+ gfn, pfn, true, true);
return true;
}
@@ -405,7 +401,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
*/
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw,
- int user_fault, int write_fault, int hlevel,
+ int write_fault, int hlevel,
pfn_t pfn, bool map_writable, bool prefault)
{
struct kvm_mmu_page *sp = NULL;
@@ -413,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
unsigned direct_access, access = gw->pt_access;
int top_level, emulate = 0;
- if (!is_present_gpte(gw->ptes[gw->level - 1]))
- return 0;
-
direct_access = gw->pte_access;
top_level = vcpu->arch.mmu.root_level;
@@ -477,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
}
clear_sp_write_flooding_count(it.sptep);
- mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
- user_fault, write_fault, &emulate, it.level,
- gw->gfn, pfn, prefault, map_writable);
+ mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate,
+ it.level, gw->gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
return emulate;
@@ -491,6 +483,46 @@ out_gpte_changed:
return 0;
}
+ /*
+ * To see whether the mapped gfn can write its page table in the current
+ * mapping.
+ *
+ * It is the helper function of FNAME(page_fault). When guest uses large page
+ * size to map the writable gfn which is used as current page table, we should
+ * force kvm to use small page size to map it because new shadow page will be
+ * created when kvm establishes shadow page table that stop kvm using large
+ * page size. Do it early can avoid unnecessary #PF and emulation.
+ *
+ * @write_fault_to_shadow_pgtable will return true if the fault gfn is
+ * currently used as its page table.
+ *
+ * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
+ * since the PDPT is always shadowed, that means, we can not use large page
+ * size to map the gfn which is used as PDPT.
+ */
+static bool
+FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
+ struct guest_walker *walker, int user_fault,
+ bool *write_fault_to_shadow_pgtable)
+{
+ int level;
+ gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
+ bool self_changed = false;
+
+ if (!(walker->pte_access & ACC_WRITE_MASK ||
+ (!is_write_protection(vcpu) && !user_fault)))
+ return false;
+
+ for (level = walker->level; level <= walker->max_level; level++) {
+ gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
+
+ self_changed |= !(gfn & mask);
+ *write_fault_to_shadow_pgtable |= !gfn;
+ }
+
+ return self_changed;
+}
+
/*
* Page fault handler. There are several causes for a page fault:
* - there is no shadow pte for the guest pte
@@ -516,7 +548,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
int level = PT_PAGE_TABLE_LEVEL;
int force_pt_level;
unsigned long mmu_seq;
- bool map_writable;
+ bool map_writable, is_self_change_mapping;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@@ -544,8 +576,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
return 0;
}
+ vcpu->arch.write_fault_to_shadow_pgtable = false;
+
+ is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
+ &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
+
if (walker.level >= PT_DIRECTORY_LEVEL)
- force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn);
+ force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
+ || is_self_change_mapping;
else
force_pt_level = 1;
if (!force_pt_level) {
@@ -564,6 +602,26 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
walker.gfn, pfn, walker.pte_access, &r))
return r;
+ /*
+ * Do not change pte_access if the pfn is a mmio page, otherwise
+ * we will cache the incorrect access into mmio spte.
+ */
+ if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
+ !is_write_protection(vcpu) && !user_fault &&
+ !is_noslot_pfn(pfn)) {
+ walker.pte_access |= ACC_WRITE_MASK;
+ walker.pte_access &= ~ACC_USER_MASK;
+
+ /*
+ * If we converted a user page to a kernel page,
+ * so that the kernel can write to it when cr0.wp=0,
+ * then we should prevent the kernel from executing it
+ * if SMEP is enabled.
+ */
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
+ walker.pte_access &= ~ACC_EXEC_MASK;
+ }
+
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
@@ -572,7 +630,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
kvm_mmu_free_some_pages(vcpu);
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
- r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
+ r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
level, pfn, map_writable, prefault);
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
@@ -747,7 +805,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
- set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
+ set_spte(vcpu, &sp->spt[i], pte_access,
PT_PAGE_TABLE_LEVEL, gfn,
spte_to_pfn(sp->spt[i]), true, false,
host_writable);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d29d3cd1c15..e1b1ce21bc0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
}
+static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+{
+ return;
+}
+
+static int svm_vm_has_apicv(struct kvm *kvm)
+{
+ return 0;
+}
+
+static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+ return;
+}
+
+static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
+{
+ return;
+}
+
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = {
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
+ .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
+ .vm_has_apicv = svm_vm_has_apicv,
+ .load_eoi_exitmap = svm_load_eoi_exitmap,
+ .hwapic_isr_update = svm_hwapic_isr_update,
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9120ae1901e..6667042714c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,6 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
+static bool __read_mostly enable_apicv_reg_vid;
+
/*
* If nested=1, nested virtualization is supported, i.e., guests may use
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -92,12 +94,8 @@ module_param(fasteoi, bool, S_IRUGO);
static bool __read_mostly nested = 0;
module_param(nested, bool, S_IRUGO);
-#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
- (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
-#define KVM_GUEST_CR0_MASK \
- (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
- (X86_CR0_WP | X86_CR0_NE)
+#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
#define KVM_VM_CR0_ALWAYS_ON \
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_CR4_GUEST_OWNED_BITS \
@@ -624,6 +622,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
+static bool guest_state_valid(struct kvm_vcpu *vcpu);
+static u32 vmx_segment_access_rights(struct kvm_segment *var);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -638,6 +638,8 @@ static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
static unsigned long *vmx_msr_bitmap_legacy;
static unsigned long *vmx_msr_bitmap_longmode;
+static unsigned long *vmx_msr_bitmap_legacy_x2apic;
+static unsigned long *vmx_msr_bitmap_longmode_x2apic;
static bool cpu_has_load_ia32_efer;
static bool cpu_has_load_perf_global_ctrl;
@@ -762,6 +764,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
}
+static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+}
+
+static inline bool cpu_has_vmx_apic_register_virt(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_APIC_REGISTER_VIRT;
+}
+
+static inline bool cpu_has_vmx_virtual_intr_delivery(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+}
+
static inline bool cpu_has_vmx_flexpriority(void)
{
return cpu_has_vmx_tpr_shadow() &&
@@ -1694,7 +1714,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
- __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
to_vmx(vcpu)->rflags = rflags;
if (to_vmx(vcpu)->rmode.vm86_active) {
to_vmx(vcpu)->rmode.save_rflags = rflags;
@@ -1820,6 +1839,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
vmx->guest_msrs[from] = tmp;
}
+static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
+{
+ unsigned long *msr_bitmap;
+
+ if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
+ if (is_long_mode(vcpu))
+ msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+ else
+ msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+ } else {
+ if (is_long_mode(vcpu))
+ msr_bitmap = vmx_msr_bitmap_longmode;
+ else
+ msr_bitmap = vmx_msr_bitmap_legacy;
+ }
+
+ vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
+}
+
/*
* Set up the vmcs to automatically save and restore system
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
@@ -1828,7 +1866,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
static void setup_msrs(struct vcpu_vmx *vmx)
{
int save_nmsrs, index;
- unsigned long *msr_bitmap;
save_nmsrs = 0;
#ifdef CONFIG_X86_64
@@ -1860,14 +1897,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx->save_nmsrs = save_nmsrs;
- if (cpu_has_vmx_msr_bitmap()) {
- if (is_long_mode(&vmx->vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode;
- else
- msr_bitmap = vmx_msr_bitmap_legacy;
-
- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
- }
+ if (cpu_has_vmx_msr_bitmap())
+ vmx_set_msr_bitmap(&vmx->vcpu);
}
/*
@@ -2533,13 +2564,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
min2 = 0;
opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_WBINVD_EXITING |
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_ENABLE_EPT |
SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
SECONDARY_EXEC_RDTSCP |
- SECONDARY_EXEC_ENABLE_INVPCID;
+ SECONDARY_EXEC_ENABLE_INVPCID |
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -2550,6 +2584,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
#endif
+
+ if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
+ _cpu_based_2nd_exec_control &= ~(
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
+
if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
enabled */
@@ -2747,6 +2788,15 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_ple())
ple_gap = 0;
+ if (!cpu_has_vmx_apic_register_virt() ||
+ !cpu_has_vmx_virtual_intr_delivery())
+ enable_apicv_reg_vid = 0;
+
+ if (enable_apicv_reg_vid)
+ kvm_x86_ops->update_cr8_intercept = NULL;
+ else
+ kvm_x86_ops->hwapic_irr_update = NULL;
+
if (nested)
nested_vmx_setup_ctls_msrs();
@@ -2758,18 +2808,28 @@ static __exit void hardware_unsetup(void)
free_kvm_area();
}
-static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
+static bool emulation_required(struct kvm_vcpu *vcpu)
{
- const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
- struct kvm_segment tmp = *save;
+ return emulate_invalid_guest_state && !guest_state_valid(vcpu);
+}
- if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
- tmp.base = vmcs_readl(sf->base);
- tmp.selector = vmcs_read16(sf->selector);
- tmp.dpl = tmp.selector & SELECTOR_RPL_MASK;
- tmp.s = 1;
+static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
+ struct kvm_segment *save)
+{
+ if (!emulate_invalid_guest_state) {
+ /*
+ * CS and SS RPL should be equal during guest entry according
+ * to VMX spec, but in reality it is not always so. Since vcpu
+ * is in the middle of the transition from real mode to
+ * protected mode it is safe to assume that RPL 0 is a good
+ * default value.
+ */
+ if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
+ save->selector &= ~SELECTOR_RPL_MASK;
+ save->dpl = save->selector & SELECTOR_RPL_MASK;
+ save->s = 1;
}
- vmx_set_segment(vcpu, &tmp, seg);
+ vmx_set_segment(vcpu, save, seg);
}
static void enter_pmode(struct kvm_vcpu *vcpu)
@@ -2777,7 +2837,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
unsigned long flags;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx->emulation_required = 1;
+ /*
+ * Update real mode segment cache. It may be not up-to-date if sement
+ * register was written while vcpu was in a guest mode.
+ */
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
+
vmx->rmode.vm86_active = 0;
vmx_segment_cache_clear(vmx);
@@ -2794,22 +2864,16 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
update_exception_bitmap(vcpu);
- if (emulate_invalid_guest_state)
- return;
-
- fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
- fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
- fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
- fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
-
- vmx_segment_cache_clear(vmx);
+ fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
+ fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
+ fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
+ fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
+ fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
+ fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
- vmcs_write16(GUEST_SS_SELECTOR, 0);
- vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
-
- vmcs_write16(GUEST_CS_SELECTOR,
- vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
- vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+ /* CPL is always 0 when CPU enters protected mode */
+ __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+ vmx->cpl = 0;
}
static gva_t rmode_tss_base(struct kvm *kvm)
@@ -2831,36 +2895,51 @@ static gva_t rmode_tss_base(struct kvm *kvm)
static void fix_rmode_seg(int seg, struct kvm_segment *save)
{
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
-
- vmcs_write16(sf->selector, save->base >> 4);
- vmcs_write32(sf->base, save->base & 0xffff0);
- vmcs_write32(sf->limit, 0xffff);
- vmcs_write32(sf->ar_bytes, 0xf3);
- if (save->base & 0xf)
- printk_once(KERN_WARNING "kvm: segment base is not paragraph"
- " aligned when entering protected mode (seg=%d)",
- seg);
+ struct kvm_segment var = *save;
+
+ var.dpl = 0x3;
+ if (seg == VCPU_SREG_CS)
+ var.type = 0x3;
+
+ if (!emulate_invalid_guest_state) {
+ var.selector = var.base >> 4;
+ var.base = var.base & 0xffff0;
+ var.limit = 0xffff;
+ var.g = 0;
+ var.db = 0;
+ var.present = 1;
+ var.s = 1;
+ var.l = 0;
+ var.unusable = 0;
+ var.type = 0x3;
+ var.avl = 0;
+ if (save->base & 0xf)
+ printk_once(KERN_WARNING "kvm: segment base is not "
+ "paragraph aligned when entering "
+ "protected mode (seg=%d)", seg);
+ }
+
+ vmcs_write16(sf->selector, var.selector);
+ vmcs_write32(sf->base, var.base);
+ vmcs_write32(sf->limit, var.limit);
+ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
}
static void enter_rmode(struct kvm_vcpu *vcpu)
{
unsigned long flags;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_segment var;
-
- if (enable_unrestricted_guest)
- return;
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
- vmx->emulation_required = 1;
vmx->rmode.vm86_active = 1;
-
/*
* Very old userspace does not call KVM_SET_TSS_ADDR before entering
* vcpu. Call it here with phys address pointing 16M below 4G.
@@ -2888,28 +2967,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
update_exception_bitmap(vcpu);
- if (emulate_invalid_guest_state)
- goto continue_rmode;
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_SS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_SS);
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_CS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_CS);
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_ES);
- vmx_set_segment(vcpu, &var, VCPU_SREG_ES);
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_DS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_DS);
+ fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
+ fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
+ fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
+ fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
+ fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
+ fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
- vmx_get_segment(vcpu, &var, VCPU_SREG_GS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_GS);
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_FS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_FS);
-
-continue_rmode:
kvm_mmu_reset_context(vcpu);
}
@@ -3068,17 +3132,18 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long hw_cr0;
+ hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
if (enable_unrestricted_guest)
- hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST)
- | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
- else
- hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
+ hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
+ else {
+ hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
- if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
- enter_pmode(vcpu);
+ if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
+ enter_pmode(vcpu);
- if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
- enter_rmode(vcpu);
+ if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
+ enter_rmode(vcpu);
+ }
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME) {
@@ -3098,7 +3163,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vmcs_writel(CR0_READ_SHADOW, cr0);
vmcs_writel(GUEST_CR0, hw_cr0);
vcpu->arch.cr0 = cr0;
- __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+
+ /* depends on vcpu->arch.cr0 to be set to a new value */
+ vmx->emulation_required = emulation_required(vcpu);
}
static u64 construct_eptp(unsigned long root_hpa)
@@ -3155,6 +3222,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!is_paging(vcpu)) {
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
+ /*
+ * SMEP is disabled if CPU is in non-paging mode in
+ * hardware. However KVM always uses paging mode to
+ * emulate guest non-paging mode with TDP.
+ * To emulate this behavior, SMEP needs to be manually
+ * disabled when guest switches to non-paging mode.
+ */
+ hw_cr4 &= ~X86_CR4_SMEP;
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
@@ -3171,10 +3246,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 ar;
- if (vmx->rmode.vm86_active
- && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
- || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
- || seg == VCPU_SREG_GS)) {
+ if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
*var = vmx->rmode.segs[seg];
if (seg == VCPU_SREG_TR
|| var->selector == vmx_read_guest_seg_selector(vmx, seg))
@@ -3187,8 +3259,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->limit = vmx_read_guest_seg_limit(vmx, seg);
var->selector = vmx_read_guest_seg_selector(vmx, seg);
ar = vmx_read_guest_seg_ar(vmx, seg);
- if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
- ar = 0;
var->type = ar & 15;
var->s = (ar >> 4) & 1;
var->dpl = (ar >> 5) & 3;
@@ -3211,8 +3281,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
}
-static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
+static int vmx_get_cpl(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
if (!is_protmode(vcpu))
return 0;
@@ -3220,24 +3292,9 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
&& (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
return 3;
- return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
-}
-
-static int vmx_get_cpl(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- /*
- * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations
- * fail; use the cache instead.
- */
- if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) {
- return vmx->cpl;
- }
-
if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
__set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
- vmx->cpl = __vmx_get_cpl(vcpu);
+ vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3;
}
return vmx->cpl;
@@ -3269,28 +3326,23 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
- u32 ar;
vmx_segment_cache_clear(vmx);
+ if (seg == VCPU_SREG_CS)
+ __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
- if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
- vmcs_write16(sf->selector, var->selector);
- vmx->rmode.segs[VCPU_SREG_TR] = *var;
- return;
+ if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
+ vmx->rmode.segs[seg] = *var;
+ if (seg == VCPU_SREG_TR)
+ vmcs_write16(sf->selector, var->selector);
+ else if (var->s)
+ fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
+ goto out;
}
+
vmcs_writel(sf->base, var->base);
vmcs_write32(sf->limit, var->limit);
vmcs_write16(sf->selector, var->selector);
- if (vmx->rmode.vm86_active && var->s) {
- vmx->rmode.segs[seg] = *var;
- /*
- * Hack real-mode segments into vm86 compatibility.
- */
- if (var->base == 0xffff0000 && var->selector == 0xf000)
- vmcs_writel(sf->base, 0xf0000);
- ar = 0xf3;
- } else
- ar = vmx_segment_access_rights(var);
/*
* Fix the "Accessed" bit in AR field of segment registers for older
@@ -3304,42 +3356,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
* kvm hack.
*/
if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
- ar |= 0x1; /* Accessed */
+ var->type |= 0x1; /* Accessed */
- vmcs_write32(sf->ar_bytes, ar);
- __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
- /*
- * Fix segments for real mode guest in hosts that don't have
- * "unrestricted_mode" or it was disabled.
- * This is done to allow migration of the guests from hosts with
- * unrestricted guest like Westmere to older host that don't have
- * unrestricted guest like Nehelem.
- */
- if (vmx->rmode.vm86_active) {
- switch (seg) {
- case VCPU_SREG_CS:
- vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
- vmcs_write32(GUEST_CS_LIMIT, 0xffff);
- if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
- vmcs_writel(GUEST_CS_BASE, 0xf0000);
- vmcs_write16(GUEST_CS_SELECTOR,
- vmcs_readl(GUEST_CS_BASE) >> 4);
- break;
- case VCPU_SREG_ES:
- case VCPU_SREG_DS:
- case VCPU_SREG_GS:
- case VCPU_SREG_FS:
- fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
- break;
- case VCPU_SREG_SS:
- vmcs_write16(GUEST_SS_SELECTOR,
- vmcs_readl(GUEST_SS_BASE) >> 4);
- vmcs_write32(GUEST_SS_LIMIT, 0xffff);
- vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
- break;
- }
- }
+out:
+ vmx->emulation_required |= emulation_required(vcpu);
}
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -3380,13 +3402,16 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
u32 ar;
vmx_get_segment(vcpu, &var, seg);
+ var.dpl = 0x3;
+ if (seg == VCPU_SREG_CS)
+ var.type = 0x3;
ar = vmx_segment_access_rights(&var);
if (var.base != (var.selector << 4))
return false;
- if (var.limit < 0xffff)
+ if (var.limit != 0xffff)
return false;
- if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3)
+ if (ar != 0xf3)
return false;
return true;
@@ -3521,6 +3546,9 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
*/
static bool guest_state_valid(struct kvm_vcpu *vcpu)
{
+ if (enable_unrestricted_guest)
+ return true;
+
/* real mode guest state checks */
if (!is_protmode(vcpu)) {
if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
@@ -3644,12 +3672,9 @@ static void seg_setup(int seg)
vmcs_write16(sf->selector, 0);
vmcs_writel(sf->base, 0);
vmcs_write32(sf->limit, 0xffff);
- if (enable_unrestricted_guest) {
- ar = 0x93;
- if (seg == VCPU_SREG_CS)
- ar |= 0x08; /* code segment */
- } else
- ar = 0xf3;
+ ar = 0x93;
+ if (seg == VCPU_SREG_CS)
+ ar |= 0x08; /* code segment */
vmcs_write32(sf->ar_bytes, ar);
}
@@ -3667,7 +3692,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
kvm_userspace_mem.flags = 0;
kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
+ r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
if (r)
goto out;
@@ -3697,7 +3722,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
kvm_userspace_mem.guest_phys_addr =
kvm->arch.ept_identity_map_addr;
kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
+ r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
if (r)
goto out;
@@ -3739,7 +3764,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
spin_unlock(&vmx_vpid_lock);
}
-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
+#define MSR_TYPE_R 1
+#define MSR_TYPE_W 2
+static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
{
int f = sizeof(unsigned long);
@@ -3752,20 +3780,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
*/
if (msr <= 0x1fff) {
- __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
- __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
+ if (type & MSR_TYPE_R)
+ /* read-low */
+ __clear_bit(msr, msr_bitmap + 0x000 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-low */
+ __clear_bit(msr, msr_bitmap + 0x800 / f);
+
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
msr &= 0x1fff;
- __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
- __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
+ if (type & MSR_TYPE_R)
+ /* read-high */
+ __clear_bit(msr, msr_bitmap + 0x400 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-high */
+ __clear_bit(msr, msr_bitmap + 0xc00 / f);
+
+ }
+}
+
+static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
+{
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return;
+
+ /*
+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+ * have the write-low and read-high bitmap offsets the wrong way round.
+ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+ */
+ if (msr <= 0x1fff) {
+ if (type & MSR_TYPE_R)
+ /* read-low */
+ __set_bit(msr, msr_bitmap + 0x000 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-low */
+ __set_bit(msr, msr_bitmap + 0x800 / f);
+
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ if (type & MSR_TYPE_R)
+ /* read-high */
+ __set_bit(msr, msr_bitmap + 0x400 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-high */
+ __set_bit(msr, msr_bitmap + 0xc00 / f);
+
}
}
static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
{
if (!longmode_only)
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
+ msr, MSR_TYPE_R | MSR_TYPE_W);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
+ msr, MSR_TYPE_R | MSR_TYPE_W);
+}
+
+static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
+{
+ __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+ msr, MSR_TYPE_R);
+ __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+ msr, MSR_TYPE_R);
+}
+
+static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
+{
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+ msr, MSR_TYPE_R);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+ msr, MSR_TYPE_R);
+}
+
+static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
+{
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+ msr, MSR_TYPE_W);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+ msr, MSR_TYPE_W);
}
/*
@@ -3844,6 +3945,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
return exec_control;
}
+static int vmx_vm_has_apicv(struct kvm *kvm)
+{
+ return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
+}
+
static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
{
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -3861,6 +3967,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
if (!ple_gap)
exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+ if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
+ exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
+ exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
return exec_control;
}
@@ -3905,6 +4015,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmx_secondary_exec_control(vmx));
}
+ if (enable_apicv_reg_vid) {
+ vmcs_write64(EOI_EXIT_BITMAP0, 0);
+ vmcs_write64(EOI_EXIT_BITMAP1, 0);
+ vmcs_write64(EOI_EXIT_BITMAP2, 0);
+ vmcs_write64(EOI_EXIT_BITMAP3, 0);
+
+ vmcs_write16(GUEST_INTR_STATUS, 0);
+ }
+
if (ple_gap) {
vmcs_write32(PLE_GAP, ple_gap);
vmcs_write32(PLE_WINDOW, ple_window);
@@ -3990,14 +4109,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx_segment_cache_clear(vmx);
seg_setup(VCPU_SREG_CS);
- /*
- * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
- * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
- */
- if (kvm_vcpu_is_bsp(&vmx->vcpu)) {
+ if (kvm_vcpu_is_bsp(&vmx->vcpu))
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
- vmcs_writel(GUEST_CS_BASE, 0x000f0000);
- } else {
+ else {
vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
}
@@ -4073,9 +4187,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
ret = 0;
- /* HACK: Don't enable emulation on guest boot/reset */
- vmx->emulation_required = 0;
-
return ret;
}
@@ -4251,7 +4362,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
.flags = 0,
};
- ret = kvm_set_memory_region(kvm, &tss_mem, 0);
+ ret = kvm_set_memory_region(kvm, &tss_mem, false);
if (ret)
return ret;
kvm->arch.tss_addr = addr;
@@ -4261,28 +4372,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static int handle_rmode_exception(struct kvm_vcpu *vcpu,
- int vec, u32 err_code)
+static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
{
- /*
- * Instruction with address size override prefix opcode 0x67
- * Cause the #SS fault with 0 error code in VM86 mode.
- */
- if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
- if (emulate_instruction(vcpu, 0) == EMULATE_DONE)
- return 1;
- /*
- * Forward all other exceptions that are valid in real mode.
- * FIXME: Breaks guest debugging in real mode, needs to be fixed with
- * the required debugging infrastructure rework.
- */
switch (vec) {
- case DB_VECTOR:
- if (vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- return 0;
- kvm_queue_exception(vcpu, vec);
- return 1;
case BP_VECTOR:
/*
* Update instruction length as we may reinject the exception
@@ -4291,7 +4383,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
- return 0;
+ return false;
+ /* fall through */
+ case DB_VECTOR:
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ return false;
/* fall through */
case DE_VECTOR:
case OF_VECTOR:
@@ -4301,10 +4398,37 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
case SS_VECTOR:
case GP_VECTOR:
case MF_VECTOR:
- kvm_queue_exception(vcpu, vec);
- return 1;
+ return true;
+ break;
}
- return 0;
+ return false;
+}
+
+static int handle_rmode_exception(struct kvm_vcpu *vcpu,
+ int vec, u32 err_code)
+{
+ /*
+ * Instruction with address size override prefix opcode 0x67
+ * Cause the #SS fault with 0 error code in VM86 mode.
+ */
+ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
+ if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+ if (vcpu->arch.halt_request) {
+ vcpu->arch.halt_request = 0;
+ return kvm_emulate_halt(vcpu);
+ }
+ return 1;
+ }
+ return 0;
+ }
+
+ /*
+ * Forward all other exceptions that are valid in real mode.
+ * FIXME: Breaks guest debugging in real mode, needs to be fixed with
+ * the required debugging infrastructure rework.
+ */
+ kvm_queue_exception(vcpu, vec);
+ return 1;
}
/*
@@ -4392,17 +4516,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
}
- if (vmx->rmode.vm86_active &&
- handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
- error_code)) {
- if (vcpu->arch.halt_request) {
- vcpu->arch.halt_request = 0;
- return kvm_emulate_halt(vcpu);
- }
- return 1;
- }
-
ex_no = intr_info & INTR_INFO_VECTOR_MASK;
+
+ if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
+ return handle_rmode_exception(vcpu, ex_no, error_code);
+
switch (ex_no) {
case DB_VECTOR:
dr6 = vmcs_readl(EXIT_QUALIFICATION);
@@ -4820,6 +4938,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
+static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
+{
+ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ int vector = exit_qualification & 0xff;
+
+ /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
+ kvm_apic_set_eoi_accelerated(vcpu, vector);
+ return 1;
+}
+
+static int handle_apic_write(struct kvm_vcpu *vcpu)
+{
+ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ u32 offset = exit_qualification & 0xfff;
+
+ /* APIC-write VM exit is trap-like and thus no need to adjust IP */
+ kvm_apic_write_nodecode(vcpu, offset);
+ return 1;
+}
+
static int handle_task_switch(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5065,7 +5203,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
schedule();
}
- vmx->emulation_required = !guest_state_valid(vcpu);
+ vmx->emulation_required = emulation_required(vcpu);
out:
return ret;
}
@@ -5754,6 +5892,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_VMON] = handle_vmon,
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
+ [EXIT_REASON_APIC_WRITE] = handle_apic_write,
+ [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
[EXIT_REASON_WBINVD] = handle_wbinvd,
[EXIT_REASON_XSETBV] = handle_xsetbv,
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
@@ -5780,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
gpa_t bitmap;
- if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS))
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
return 1;
/*
@@ -6008,7 +6148,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
u32 vectoring_info = vmx->idt_vectoring_info;
/* If guest state is invalid, start emulating */
- if (vmx->emulation_required && emulate_invalid_guest_state)
+ if (vmx->emulation_required)
return handle_invalid_guest_state(vcpu);
/*
@@ -6103,6 +6243,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
vmcs_write32(TPR_THRESHOLD, irr);
}
+static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+{
+ u32 sec_exec_control;
+
+ /*
+ * There is not point to enable virtualize x2apic without enable
+ * apicv
+ */
+ if (!cpu_has_vmx_virtualize_x2apic_mode() ||
+ !vmx_vm_has_apicv(vcpu->kvm))
+ return;
+
+ if (!vm_need_tpr_shadow(vcpu->kvm))
+ return;
+
+ sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+
+ if (set) {
+ sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+ } else {
+ sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+ sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ }
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
+
+ vmx_set_msr_bitmap(vcpu);
+}
+
+static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
+{
+ u16 status;
+ u8 old;
+
+ if (!vmx_vm_has_apicv(kvm))
+ return;
+
+ if (isr == -1)
+ isr = 0;
+
+ status = vmcs_read16(GUEST_INTR_STATUS);
+ old = status >> 8;
+ if (isr != old) {
+ status &= 0xff;
+ status |= isr << 8;
+ vmcs_write16(GUEST_INTR_STATUS, status);
+ }
+}
+
+static void vmx_set_rvi(int vector)
+{
+ u16 status;
+ u8 old;
+
+ status = vmcs_read16(GUEST_INTR_STATUS);
+ old = (u8)status & 0xff;
+ if ((u8)vector != old) {
+ status &= ~0xff;
+ status |= (u8)vector;
+ vmcs_write16(GUEST_INTR_STATUS, status);
+ }
+}
+
+static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
+{
+ if (max_irr == -1)
+ return;
+
+ vmx_set_rvi(max_irr);
+}
+
+static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+ vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
+ vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
+ vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
+ vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
+}
+
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -6291,7 +6510,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
- if (vmx->emulation_required && emulate_invalid_guest_state)
+ if (vmx->emulation_required)
return;
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
@@ -7366,6 +7585,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
+ .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+ .vm_has_apicv = vmx_vm_has_apicv,
+ .load_eoi_exitmap = vmx_load_eoi_exitmap,
+ .hwapic_irr_update = vmx_hwapic_irr_update,
+ .hwapic_isr_update = vmx_hwapic_isr_update,
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
@@ -7398,7 +7622,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
static int __init vmx_init(void)
{
- int r, i;
+ int r, i, msr;
rdmsrl_safe(MSR_EFER, &host_efer);
@@ -7419,11 +7643,19 @@ static int __init vmx_init(void)
if (!vmx_msr_bitmap_legacy)
goto out1;
+ vmx_msr_bitmap_legacy_x2apic =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_msr_bitmap_legacy_x2apic)
+ goto out2;
vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_longmode)
- goto out2;
+ goto out3;
+ vmx_msr_bitmap_longmode_x2apic =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_msr_bitmap_longmode_x2apic)
+ goto out4;
/*
* Allow direct access to the PC debug port (it is often used for I/O
@@ -7455,6 +7687,28 @@ static int __init vmx_init(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+ memcpy(vmx_msr_bitmap_legacy_x2apic,
+ vmx_msr_bitmap_legacy, PAGE_SIZE);
+ memcpy(vmx_msr_bitmap_longmode_x2apic,
+ vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+ if (enable_apicv_reg_vid) {
+ for (msr = 0x800; msr <= 0x8ff; msr++)
+ vmx_disable_intercept_msr_read_x2apic(msr);
+
+ /* According SDM, in x2apic mode, the whole id reg is used.
+ * But in KVM, it only use the highest eight bits. Need to
+ * intercept it */
+ vmx_enable_intercept_msr_read_x2apic(0x802);
+ /* TMCCT */
+ vmx_enable_intercept_msr_read_x2apic(0x839);
+ /* TPR */
+ vmx_disable_intercept_msr_write_x2apic(0x808);
+ /* EOI */
+ vmx_disable_intercept_msr_write_x2apic(0x80b);
+ /* SELF-IPI */
+ vmx_disable_intercept_msr_write_x2apic(0x83f);
+ }
if (enable_ept) {
kvm_mmu_set_mask_ptes(0ull,
@@ -7468,8 +7722,10 @@ static int __init vmx_init(void)
return 0;
-out3:
+out4:
free_page((unsigned long)vmx_msr_bitmap_longmode);
+out3:
+ free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
out2:
free_page((unsigned long)vmx_msr_bitmap_legacy);
out1:
@@ -7481,6 +7737,8 @@ out:
static void __exit vmx_exit(void)
{
+ free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+ free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
free_page((unsigned long)vmx_msr_bitmap_legacy);
free_page((unsigned long)vmx_msr_bitmap_longmode);
free_page((unsigned long)vmx_io_bitmap_b);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 76f54461f7c..f71500af1f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -120,7 +120,7 @@ struct kvm_shared_msrs {
};
static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
-static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
+static struct kvm_shared_msrs __percpu *shared_msrs;
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
@@ -191,10 +191,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
static void shared_msr_update(unsigned slot, u32 msr)
{
- struct kvm_shared_msrs *smsr;
u64 value;
+ unsigned int cpu = smp_processor_id();
+ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
- smsr = &__get_cpu_var(shared_msrs);
/* only read, and nobody should modify it at this time,
* so don't need lock */
if (slot >= shared_msrs_global.nr) {
@@ -226,7 +226,8 @@ static void kvm_shared_msr_cpu_online(void)
void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
{
- struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+ unsigned int cpu = smp_processor_id();
+ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
if (((value ^ smsr->values[slot].curr) & mask) == 0)
return;
@@ -242,7 +243,8 @@ EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
static void drop_user_return_notifiers(void *ignore)
{
- struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+ unsigned int cpu = smp_processor_id();
+ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
if (smsr->registered)
kvm_on_user_return(&smsr->urn);
@@ -870,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
kvm_x86_ops->set_efer(vcpu, efer);
- vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
-
/* Update reserved bits */
if ((efer ^ old_efer) & EFER_NX)
kvm_mmu_reset_context(vcpu);
@@ -1879,6 +1879,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
u64 data = msr_info->data;
switch (msr) {
+ case MSR_AMD64_NB_CFG:
+ case MSR_IA32_UCODE_REV:
+ case MSR_IA32_UCODE_WRITE:
+ case MSR_VM_HSAVE_PA:
+ case MSR_AMD64_PATCH_LOADER:
+ case MSR_AMD64_BU_CFG2:
+ break;
+
case MSR_EFER:
return set_efer(vcpu, data);
case MSR_K7_HWCR:
@@ -1898,8 +1906,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
}
break;
- case MSR_AMD64_NB_CFG:
- break;
case MSR_IA32_DEBUGCTLMSR:
if (!data) {
/* We support the non-activated case already */
@@ -1912,11 +1918,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
__func__, data);
break;
- case MSR_IA32_UCODE_REV:
- case MSR_IA32_UCODE_WRITE:
- case MSR_VM_HSAVE_PA:
- case MSR_AMD64_PATCH_LOADER:
- break;
case 0x200 ... 0x2ff:
return set_msr_mtrr(vcpu, msr, data);
case MSR_IA32_APICBASE:
@@ -2251,6 +2252,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_K8_INT_PENDING_MSG:
case MSR_AMD64_NB_CFG:
case MSR_FAM10H_MMIO_CONF_BASE:
+ case MSR_AMD64_BU_CFG2:
data = 0;
break;
case MSR_P6_PERFCTR0:
@@ -2518,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext)
r = KVM_MAX_VCPUS;
break;
case KVM_CAP_NR_MEMSLOTS:
- r = KVM_MEMORY_SLOTS;
+ r = KVM_USER_MEM_SLOTS;
break;
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
@@ -3270,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
return -EINVAL;
mutex_lock(&kvm->slots_lock);
- spin_lock(&kvm->mmu_lock);
kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
- spin_unlock(&kvm->mmu_lock);
mutex_unlock(&kvm->slots_lock);
return 0;
}
@@ -3435,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
mutex_lock(&kvm->slots_lock);
r = -EINVAL;
- if (log->slot >= KVM_MEMORY_SLOTS)
+ if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
memslot = id_to_memslot(kvm->memslots, log->slot);
@@ -4491,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
*selector = var.selector;
- if (var.unusable)
+ if (var.unusable) {
+ memset(desc, 0, sizeof(*desc));
return false;
+ }
if (var.g)
var.limit >>= 12;
@@ -4753,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
return r;
}
-static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
+static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
+ bool write_fault_to_shadow_pgtable)
{
- gpa_t gpa;
+ gpa_t gpa = cr2;
pfn_t pfn;
- if (tdp_enabled)
- return false;
-
- /*
- * if emulation was due to access to shadowed page table
- * and it failed try to unshadow page and re-enter the
- * guest to let CPU execute the instruction.
- */
- if (kvm_mmu_unprotect_page_virt(vcpu, gva))
- return true;
-
- gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
+ if (!vcpu->arch.mmu.direct_map) {
+ /*
+ * Write permission should be allowed since only
+ * write access need to be emulated.
+ */
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
- if (gpa == UNMAPPED_GVA)
- return true; /* let cpu generate fault */
+ /*
+ * If the mapping is invalid in guest, let cpu retry
+ * it to generate fault.
+ */
+ if (gpa == UNMAPPED_GVA)
+ return true;
+ }
/*
* Do not retry the unhandleable instruction if it faults on the
@@ -4781,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
* instruction -> ...
*/
pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
- if (!is_error_noslot_pfn(pfn)) {
- kvm_release_pfn_clean(pfn);
+
+ /*
+ * If the instruction failed on the error pfn, it can not be fixed,
+ * report the error to userspace.
+ */
+ if (is_error_noslot_pfn(pfn))
+ return false;
+
+ kvm_release_pfn_clean(pfn);
+
+ /* The instructions are well-emulated on direct mmu. */
+ if (vcpu->arch.mmu.direct_map) {
+ unsigned int indirect_shadow_pages;
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+ indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
+ spin_unlock(&vcpu->kvm->mmu_lock);
+
+ if (indirect_shadow_pages)
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+
return true;
}
- return false;
+ /*
+ * if emulation was due to access to shadowed page table
+ * and it failed try to unshadow page and re-enter the
+ * guest to let CPU execute the instruction.
+ */
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+
+ /*
+ * If the access faults on its page table, it can not
+ * be fixed by unprotecting shadow page and it should
+ * be reported to userspace.
+ */
+ return !write_fault_to_shadow_pgtable;
}
static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -4828,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
if (!vcpu->arch.mmu.direct_map)
gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
- kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
return true;
}
@@ -4845,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
int r;
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
bool writeback = true;
+ bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
+ /*
+ * Clear write_fault_to_shadow_pgtable here to ensure it is
+ * never reused.
+ */
+ vcpu->arch.write_fault_to_shadow_pgtable = false;
kvm_clear_exception_queue(vcpu);
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
@@ -4864,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
if (r != EMULATION_OK) {
if (emulation_type & EMULTYPE_TRAP_UD)
return EMULATE_FAIL;
- if (reexecute_instruction(vcpu, cr2))
+ if (reexecute_instruction(vcpu, cr2,
+ write_fault_to_spt))
return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
@@ -4894,7 +4934,7 @@ restart:
return EMULATE_DONE;
if (r == EMULATION_FAILED) {
- if (reexecute_instruction(vcpu, cr2))
+ if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
return EMULATE_DONE;
return handle_emulation_failure(vcpu);
@@ -5233,9 +5273,16 @@ int kvm_arch_init(void *opaque)
goto out;
}
+ r = -ENOMEM;
+ shared_msrs = alloc_percpu(struct kvm_shared_msrs);
+ if (!shared_msrs) {
+ printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
+ goto out;
+ }
+
r = kvm_mmu_module_init();
if (r)
- goto out;
+ goto out_free_percpu;
kvm_set_mmio_spte_mask();
kvm_init_msr_list();
@@ -5258,6 +5305,8 @@ int kvm_arch_init(void *opaque)
return 0;
+out_free_percpu:
+ free_percpu(shared_msrs);
out:
return r;
}
@@ -5275,6 +5324,7 @@ void kvm_arch_exit(void)
#endif
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
+ free_percpu(shared_msrs);
}
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -5527,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
vcpu->arch.nmi_injected = true;
kvm_x86_ops->set_nmi(vcpu);
}
- } else if (kvm_cpu_has_interrupt(vcpu)) {
+ } else if (kvm_cpu_has_injectable_intr(vcpu)) {
if (kvm_x86_ops->interrupt_allowed(vcpu)) {
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
false);
@@ -5595,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
#endif
}
+static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
+{
+ u64 eoi_exit_bitmap[4];
+
+ memset(eoi_exit_bitmap, 0, 32);
+
+ kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
+ kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
+}
+
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
@@ -5648,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_handle_pmu_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_deliver_pmi(vcpu);
+ if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
+ update_eoi_exitmap(vcpu);
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -5656,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
kvm_x86_ops->enable_nmi_window(vcpu);
- else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+ else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
if (kvm_lapic_enabled(vcpu)) {
+ /*
+ * Update architecture specific hints for APIC
+ * virtual interrupt delivery.
+ */
+ if (kvm_x86_ops->hwapic_irr_update)
+ kvm_x86_ops->hwapic_irr_update(vcpu,
+ kvm_lapic_find_highest_irr(vcpu));
update_cr8_intercept(vcpu);
kvm_lapic_sync_to_vapic(vcpu);
}
@@ -6839,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
- int user_alloc)
+ bool user_alloc)
{
int npages = memslot->npages;
- int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
-
- /* Prevent internal slot pages from being moved by fork()/COW. */
- if (memslot->id >= KVM_MEMORY_SLOTS)
- map_flags = MAP_SHARED | MAP_ANONYMOUS;
- /*To keep backward compatibility with older userspace,
- *x86 needs to handle !user_alloc case.
+ /*
+ * Only private memory slots need to be mapped here since
+ * KVM_SET_MEMORY_REGION ioctl is no longer supported.
*/
- if (!user_alloc) {
- if (npages && !old.npages) {
- unsigned long userspace_addr;
+ if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
+ unsigned long userspace_addr;
- userspace_addr = vm_mmap(NULL, 0,
- npages * PAGE_SIZE,
- PROT_READ | PROT_WRITE,
- map_flags,
- 0);
+ /*
+ * MAP_SHARED to prevent internal slot pages from being moved
+ * by fork()/COW.
+ */
+ userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, 0);
- if (IS_ERR((void *)userspace_addr))
- return PTR_ERR((void *)userspace_addr);
+ if (IS_ERR((void *)userspace_addr))
+ return PTR_ERR((void *)userspace_addr);
- memslot->userspace_addr = userspace_addr;
- }
+ memslot->userspace_addr = userspace_addr;
}
-
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
- int user_alloc)
+ bool user_alloc)
{
int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
- if (!user_alloc && !old.user_alloc && old.npages && !npages) {
+ if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
int ret;
ret = vm_munmap(old.userspace_addr,
@@ -6894,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
if (!kvm->arch.n_requested_mmu_pages)
nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
- spin_lock(&kvm->mmu_lock);
if (nr_mmu_pages)
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
- kvm_mmu_slot_remove_write_access(kvm, mem->slot);
- spin_unlock(&kvm->mmu_lock);
+ /*
+ * Write protect all pages for dirty logging.
+ * Existing largepage mappings are destroyed here and new ones will
+ * not be created until the end of the logging.
+ */
+ if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
+ kvm_mmu_slot_remove_write_access(kvm, mem->slot);
/*
* If memory slot is created, or moved, we need to clear all
* mmio sptes.
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 7872a3330fb..29043d2048a 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -2,6 +2,7 @@ config LGUEST_GUEST
bool "Lguest guest support"
select PARAVIRT
depends on X86_32
+ select TTY
select VIRTUALIZATION
select VIRTIO
select VIRTIO_CONSOLE
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index df4176cdbb3..1cbd89ca556 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -552,7 +552,8 @@ static void lguest_write_cr3(unsigned long cr3)
current_cr3 = cr3;
/* These two page tables are simple, linear, and used during boot */
- if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
+ if (cr3 != __pa_symbol(swapper_pg_dir) &&
+ cr3 != __pa_symbol(initial_page_table))
cr3_changed = true;
}
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index e395693abdb..7c3bee636e2 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -98,7 +98,7 @@ void use_tsc_delay(void)
delay_fn = delay_tsc;
}
-int __devinit read_current_timer(unsigned long *timer_val)
+int read_current_timer(unsigned long *timer_val)
{
if (delay_fn == delay_tsc) {
rdtscll(*timer_val);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 156b9c80467..a4512359656 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -15,11 +15,10 @@
* __get_user_X
*
* Inputs: %[r|e]ax contains the address.
- * The register is modified, but all changes are undone
- * before returning because the C code doesn't know about it.
*
* Outputs: %[r|e]ax is error code (0 or -EFAULT)
* %[r|e]dx contains zero-extended value
+ * %ecx contains the high half for 32-bit __get_user_8
*
*
* These functions should not modify any other registers,
@@ -42,7 +41,7 @@ ENTRY(__get_user_1)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
ASM_STAC
-1: movzb (%_ASM_AX),%edx
+1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
ret
@@ -72,29 +71,42 @@ ENTRY(__get_user_4)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
ASM_STAC
-3: mov -3(%_ASM_AX),%edx
+3: movl -3(%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
ret
CFI_ENDPROC
ENDPROC(__get_user_4)
-#ifdef CONFIG_X86_64
ENTRY(__get_user_8)
CFI_STARTPROC
+#ifdef CONFIG_X86_64
add $7,%_ASM_AX
jc bad_get_user
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
- jae bad_get_user
+ jae bad_get_user
ASM_STAC
-4: movq -7(%_ASM_AX),%_ASM_DX
+4: movq -7(%_ASM_AX),%rdx
xor %eax,%eax
ASM_CLAC
ret
+#else
+ add $7,%_ASM_AX
+ jc bad_get_user_8
+ GET_THREAD_INFO(%_ASM_DX)
+ cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+ jae bad_get_user_8
+ ASM_STAC
+4: movl -7(%_ASM_AX),%edx
+5: movl -3(%_ASM_AX),%ecx
+ xor %eax,%eax
+ ASM_CLAC
+ ret
+#endif
CFI_ENDPROC
ENDPROC(__get_user_8)
-#endif
+
bad_get_user:
CFI_STARTPROC
@@ -105,9 +117,24 @@ bad_get_user:
CFI_ENDPROC
END(bad_get_user)
+#ifdef CONFIG_X86_32
+bad_get_user_8:
+ CFI_STARTPROC
+ xor %edx,%edx
+ xor %ecx,%ecx
+ mov $(-EFAULT),%_ASM_AX
+ ASM_CLAC
+ ret
+ CFI_ENDPROC
+END(bad_get_user_8)
+#endif
+
_ASM_EXTABLE(1b,bad_get_user)
_ASM_EXTABLE(2b,bad_get_user)
_ASM_EXTABLE(3b,bad_get_user)
#ifdef CONFIG_X86_64
_ASM_EXTABLE(4b,bad_get_user)
+#else
+ _ASM_EXTABLE(4b,bad_get_user_8)
+ _ASM_EXTABLE(5b,bad_get_user_8)
#endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 027088f2f7d..fb674fd3fc2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -748,13 +748,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
return;
}
#endif
+ /* Kernel addresses are always protection faults: */
+ if (address >= TASK_SIZE)
+ error_code |= PF_PROT;
- if (unlikely(show_unhandled_signals))
+ if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
- /* Kernel addresses are always protection faults: */
tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_PF;
force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d7aea41563b..4903a03ae87 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -16,87 +16,134 @@
#include <asm/tlb.h>
#include <asm/proto.h>
#include <asm/dma.h> /* for MAX_DMA_PFN */
+#include <asm/microcode.h>
-unsigned long __initdata pgt_buf_start;
-unsigned long __meminitdata pgt_buf_end;
-unsigned long __meminitdata pgt_buf_top;
+#include "mm_internal.h"
-int after_bootmem;
+static unsigned long __initdata pgt_buf_start;
+static unsigned long __initdata pgt_buf_end;
+static unsigned long __initdata pgt_buf_top;
-int direct_gbpages
-#ifdef CONFIG_DIRECT_GBPAGES
- = 1
-#endif
-;
+static unsigned long min_pfn_mapped;
-struct map_range {
- unsigned long start;
- unsigned long end;
- unsigned page_size_mask;
-};
+static bool __initdata can_use_brk_pgt = true;
/*
- * First calculate space needed for kernel direct mapping page tables to cover
- * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB
- * pages. Then find enough contiguous space for those page tables.
+ * Pages returned are already directly mapped.
+ *
+ * Changing that is likely to break Xen, see commit:
+ *
+ * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve
+ *
+ * for detailed information.
*/
-static void __init find_early_table_space(struct map_range *mr, int nr_range)
+__ref void *alloc_low_pages(unsigned int num)
{
+ unsigned long pfn;
int i;
- unsigned long puds = 0, pmds = 0, ptes = 0, tables;
- unsigned long start = 0, good_end;
- phys_addr_t base;
- for (i = 0; i < nr_range; i++) {
- unsigned long range, extra;
+ if (after_bootmem) {
+ unsigned int order;
- range = mr[i].end - mr[i].start;
- puds += (range + PUD_SIZE - 1) >> PUD_SHIFT;
+ order = get_order((unsigned long)num << PAGE_SHIFT);
+ return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK |
+ __GFP_ZERO, order);
+ }
- if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) {
- extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT);
- pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT;
- } else {
- pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT;
- }
+ if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
+ unsigned long ret;
+ if (min_pfn_mapped >= max_pfn_mapped)
+ panic("alloc_low_page: ran out of memory");
+ ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
+ max_pfn_mapped << PAGE_SHIFT,
+ PAGE_SIZE * num , PAGE_SIZE);
+ if (!ret)
+ panic("alloc_low_page: can not alloc memory");
+ memblock_reserve(ret, PAGE_SIZE * num);
+ pfn = ret >> PAGE_SHIFT;
+ } else {
+ pfn = pgt_buf_end;
+ pgt_buf_end += num;
+ printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
+ pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
+ }
- if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) {
- extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT);
-#ifdef CONFIG_X86_32
- extra += PMD_SIZE;
-#endif
- ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
- } else {
- ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT;
- }
+ for (i = 0; i < num; i++) {
+ void *adr;
+
+ adr = __va((pfn + i) << PAGE_SHIFT);
+ clear_page(adr);
}
- tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
- tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
- tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
+ return __va(pfn << PAGE_SHIFT);
+}
-#ifdef CONFIG_X86_32
- /* for fixmap */
- tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-#endif
- good_end = max_pfn_mapped << PAGE_SHIFT;
+/* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */
+#define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE)
+RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
+void __init early_alloc_pgt_buf(void)
+{
+ unsigned long tables = INIT_PGT_BUF_SIZE;
+ phys_addr_t base;
- base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
- if (!base)
- panic("Cannot find space for the kernel page tables");
+ base = __pa(extend_brk(tables, PAGE_SIZE));
pgt_buf_start = base >> PAGE_SHIFT;
pgt_buf_end = pgt_buf_start;
pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
+}
- printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n",
- mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT,
- (pgt_buf_top << PAGE_SHIFT) - 1);
+int after_bootmem;
+
+int direct_gbpages
+#ifdef CONFIG_DIRECT_GBPAGES
+ = 1
+#endif
+;
+
+static void __init init_gbpages(void)
+{
+#ifdef CONFIG_X86_64
+ if (direct_gbpages && cpu_has_gbpages)
+ printk(KERN_INFO "Using GB pages for direct mapping\n");
+ else
+ direct_gbpages = 0;
+#endif
}
-void __init native_pagetable_reserve(u64 start, u64 end)
+struct map_range {
+ unsigned long start;
+ unsigned long end;
+ unsigned page_size_mask;
+};
+
+static int page_size_mask;
+
+static void __init probe_page_size_mask(void)
{
- memblock_reserve(start, end - start);
+ init_gbpages();
+
+#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
+ /*
+ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+ * This will simplify cpa(), which otherwise needs to support splitting
+ * large pages into small in interrupt context, etc.
+ */
+ if (direct_gbpages)
+ page_size_mask |= 1 << PG_LEVEL_1G;
+ if (cpu_has_pse)
+ page_size_mask |= 1 << PG_LEVEL_2M;
+#endif
+
+ /* Enable PSE if available */
+ if (cpu_has_pse)
+ set_in_cr4(X86_CR4_PSE);
+
+ /* Enable PGE if available */
+ if (cpu_has_pge) {
+ set_in_cr4(X86_CR4_PGE);
+ __supported_pte_mask |= _PAGE_GLOBAL;
+ }
}
#ifdef CONFIG_X86_32
@@ -122,58 +169,51 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
}
/*
- * Setup the direct mapping of the physical memory at PAGE_OFFSET.
- * This runs before bootmem is initialized and gets pages directly from
- * the physical memory. To access them they are temporarily mapped.
+ * adjust the page_size_mask for small range to go with
+ * big page size instead small one if nearby are ram too.
*/
-unsigned long __init_refok init_memory_mapping(unsigned long start,
- unsigned long end)
+static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
+ int nr_range)
{
- unsigned long page_size_mask = 0;
- unsigned long start_pfn, end_pfn;
- unsigned long ret = 0;
- unsigned long pos;
-
- struct map_range mr[NR_RANGE_MR];
- int nr_range, i;
- int use_pse, use_gbpages;
+ int i;
- printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n",
- start, end - 1);
+ for (i = 0; i < nr_range; i++) {
+ if ((page_size_mask & (1<<PG_LEVEL_2M)) &&
+ !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) {
+ unsigned long start = round_down(mr[i].start, PMD_SIZE);
+ unsigned long end = round_up(mr[i].end, PMD_SIZE);
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
- /*
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
- * This will simplify cpa(), which otherwise needs to support splitting
- * large pages into small in interrupt context, etc.
- */
- use_pse = use_gbpages = 0;
-#else
- use_pse = cpu_has_pse;
- use_gbpages = direct_gbpages;
+#ifdef CONFIG_X86_32
+ if ((end >> PAGE_SHIFT) > max_low_pfn)
+ continue;
#endif
- /* Enable PSE if available */
- if (cpu_has_pse)
- set_in_cr4(X86_CR4_PSE);
+ if (memblock_is_region_memory(start, end - start))
+ mr[i].page_size_mask |= 1<<PG_LEVEL_2M;
+ }
+ if ((page_size_mask & (1<<PG_LEVEL_1G)) &&
+ !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) {
+ unsigned long start = round_down(mr[i].start, PUD_SIZE);
+ unsigned long end = round_up(mr[i].end, PUD_SIZE);
- /* Enable PGE if available */
- if (cpu_has_pge) {
- set_in_cr4(X86_CR4_PGE);
- __supported_pte_mask |= _PAGE_GLOBAL;
+ if (memblock_is_region_memory(start, end - start))
+ mr[i].page_size_mask |= 1<<PG_LEVEL_1G;
+ }
}
+}
- if (use_gbpages)
- page_size_mask |= 1 << PG_LEVEL_1G;
- if (use_pse)
- page_size_mask |= 1 << PG_LEVEL_2M;
+static int __meminit split_mem_range(struct map_range *mr, int nr_range,
+ unsigned long start,
+ unsigned long end)
+{
+ unsigned long start_pfn, end_pfn, limit_pfn;
+ unsigned long pfn;
+ int i;
- memset(mr, 0, sizeof(mr));
- nr_range = 0;
+ limit_pfn = PFN_DOWN(end);
/* head if not big page alignment ? */
- start_pfn = start >> PAGE_SHIFT;
- pos = start_pfn << PAGE_SHIFT;
+ pfn = start_pfn = PFN_DOWN(start);
#ifdef CONFIG_X86_32
/*
* Don't use a large page for the first 2/4MB of memory
@@ -181,66 +221,60 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
* and overlapping MTRRs into large pages can cause
* slowdowns.
*/
- if (pos == 0)
- end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
+ if (pfn == 0)
+ end_pfn = PFN_DOWN(PMD_SIZE);
else
- end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
+ end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
#else /* CONFIG_X86_64 */
- end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
+ end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
#endif
- if (end_pfn > (end >> PAGE_SHIFT))
- end_pfn = end >> PAGE_SHIFT;
+ if (end_pfn > limit_pfn)
+ end_pfn = limit_pfn;
if (start_pfn < end_pfn) {
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
- pos = end_pfn << PAGE_SHIFT;
+ pfn = end_pfn;
}
/* big page (2M) range */
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
+ start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
#ifdef CONFIG_X86_32
- end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+ end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
#else /* CONFIG_X86_64 */
- end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
- << (PUD_SHIFT - PAGE_SHIFT);
- if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
- end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
+ end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
+ if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE)))
+ end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
#endif
if (start_pfn < end_pfn) {
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
page_size_mask & (1<<PG_LEVEL_2M));
- pos = end_pfn << PAGE_SHIFT;
+ pfn = end_pfn;
}
#ifdef CONFIG_X86_64
/* big page (1G) range */
- start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
- << (PUD_SHIFT - PAGE_SHIFT);
- end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+ start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE));
+ end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE));
if (start_pfn < end_pfn) {
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
page_size_mask &
((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
- pos = end_pfn << PAGE_SHIFT;
+ pfn = end_pfn;
}
/* tail is not big page (1G) alignment */
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
- << (PMD_SHIFT - PAGE_SHIFT);
- end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+ start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));
+ end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));
if (start_pfn < end_pfn) {
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
page_size_mask & (1<<PG_LEVEL_2M));
- pos = end_pfn << PAGE_SHIFT;
+ pfn = end_pfn;
}
#endif
/* tail is not big page (2M) alignment */
- start_pfn = pos>>PAGE_SHIFT;
- end_pfn = end>>PAGE_SHIFT;
+ start_pfn = pfn;
+ end_pfn = limit_pfn;
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
/* try to merge same page size and continuous */
@@ -257,59 +291,169 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
nr_range--;
}
+ if (!after_bootmem)
+ adjust_range_page_size_mask(mr, nr_range);
+
for (i = 0; i < nr_range; i++)
printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
mr[i].start, mr[i].end - 1,
(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
(mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
- /*
- * Find space for the kernel direct mapping tables.
- *
- * Later we should allocate these tables in the local node of the
- * memory mapped. Unfortunately this is done currently before the
- * nodes are discovered.
- */
- if (!after_bootmem)
- find_early_table_space(mr, nr_range);
+ return nr_range;
+}
+
+struct range pfn_mapped[E820_X_MAX];
+int nr_pfn_mapped;
+
+static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
+{
+ nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX,
+ nr_pfn_mapped, start_pfn, end_pfn);
+ nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX);
+
+ max_pfn_mapped = max(max_pfn_mapped, end_pfn);
+
+ if (start_pfn < (1UL<<(32-PAGE_SHIFT)))
+ max_low_pfn_mapped = max(max_low_pfn_mapped,
+ min(end_pfn, 1UL<<(32-PAGE_SHIFT)));
+}
+
+bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
+{
+ int i;
+
+ for (i = 0; i < nr_pfn_mapped; i++)
+ if ((start_pfn >= pfn_mapped[i].start) &&
+ (end_pfn <= pfn_mapped[i].end))
+ return true;
+
+ return false;
+}
+
+/*
+ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs before bootmem is initialized and gets pages directly from
+ * the physical memory. To access them they are temporarily mapped.
+ */
+unsigned long __init_refok init_memory_mapping(unsigned long start,
+ unsigned long end)
+{
+ struct map_range mr[NR_RANGE_MR];
+ unsigned long ret = 0;
+ int nr_range, i;
+
+ pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n",
+ start, end - 1);
+
+ memset(mr, 0, sizeof(mr));
+ nr_range = split_mem_range(mr, 0, start, end);
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
mr[i].page_size_mask);
-#ifdef CONFIG_X86_32
- early_ioremap_page_table_range_init();
+ add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
- load_cr3(swapper_pg_dir);
-#endif
+ return ret >> PAGE_SHIFT;
+}
- __flush_tlb_all();
+/*
+ * would have hole in the middle or ends, and only ram parts will be mapped.
+ */
+static unsigned long __init init_range_memory_mapping(
+ unsigned long r_start,
+ unsigned long r_end)
+{
+ unsigned long start_pfn, end_pfn;
+ unsigned long mapped_ram_size = 0;
+ int i;
- /*
- * Reserve the kernel pagetable pages we used (pgt_buf_start -
- * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
- * so that they can be reused for other purposes.
- *
- * On native it just means calling memblock_reserve, on Xen it also
- * means marking RW the pagetable pages that we allocated before
- * but that haven't been used.
- *
- * In fact on xen we mark RO the whole range pgt_buf_start -
- * pgt_buf_top, because we have to make sure that when
- * init_memory_mapping reaches the pagetable pages area, it maps
- * RO all the pagetable pages, including the ones that are beyond
- * pgt_buf_end at that time.
- */
- if (!after_bootmem && pgt_buf_end > pgt_buf_start)
- x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
- PFN_PHYS(pgt_buf_end));
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
+ u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end);
+ u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end);
+ if (start >= end)
+ continue;
- if (!after_bootmem)
- early_memtest(start, end);
+ /*
+ * if it is overlapping with brk pgt, we need to
+ * alloc pgt buf from memblock instead.
+ */
+ can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >=
+ min(end, (u64)pgt_buf_top<<PAGE_SHIFT);
+ init_memory_mapping(start, end);
+ mapped_ram_size += end - start;
+ can_use_brk_pgt = true;
+ }
- return ret >> PAGE_SHIFT;
+ return mapped_ram_size;
}
+/* (PUD_SHIFT-PMD_SHIFT)/2 */
+#define STEP_SIZE_SHIFT 5
+void __init init_mem_mapping(void)
+{
+ unsigned long end, real_end, start, last_start;
+ unsigned long step_size;
+ unsigned long addr;
+ unsigned long mapped_ram_size = 0;
+ unsigned long new_mapped_ram_size;
+
+ probe_page_size_mask();
+
+#ifdef CONFIG_X86_64
+ end = max_pfn << PAGE_SHIFT;
+#else
+ end = max_low_pfn << PAGE_SHIFT;
+#endif
+
+ /* the ISA range is always mapped regardless of memory holes */
+ init_memory_mapping(0, ISA_END_ADDRESS);
+
+ /* xen has big range in reserved near end of ram, skip it at first */
+ addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE,
+ PAGE_SIZE);
+ real_end = addr + PMD_SIZE;
+
+ /* step_size need to be small so pgt_buf from BRK could cover it */
+ step_size = PMD_SIZE;
+ max_pfn_mapped = 0; /* will get exact value next */
+ min_pfn_mapped = real_end >> PAGE_SHIFT;
+ last_start = start = real_end;
+ while (last_start > ISA_END_ADDRESS) {
+ if (last_start > step_size) {
+ start = round_down(last_start - 1, step_size);
+ if (start < ISA_END_ADDRESS)
+ start = ISA_END_ADDRESS;
+ } else
+ start = ISA_END_ADDRESS;
+ new_mapped_ram_size = init_range_memory_mapping(start,
+ last_start);
+ last_start = start;
+ min_pfn_mapped = last_start >> PAGE_SHIFT;
+ /* only increase step_size after big range get mapped */
+ if (new_mapped_ram_size > mapped_ram_size)
+ step_size <<= STEP_SIZE_SHIFT;
+ mapped_ram_size += new_mapped_ram_size;
+ }
+
+ if (real_end < end)
+ init_range_memory_mapping(real_end, end);
+
+#ifdef CONFIG_X86_64
+ if (max_pfn > max_low_pfn) {
+ /* can we preseve max_low_pfn ?*/
+ max_low_pfn = max_pfn;
+ }
+#else
+ early_ioremap_page_table_range_init();
+#endif
+
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+
+ early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
+}
/*
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
@@ -391,6 +535,15 @@ void free_initmem(void)
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
+#ifdef CONFIG_MICROCODE_EARLY
+ /*
+ * Remember, initrd memory may contain microcode or other useful things.
+ * Before we lose initrd mem, we need to find a place to hold them
+ * now that normal virtual memory is enabled.
+ */
+ save_microcode_in_initrd();
+#endif
+
/*
* end could be not aligned, and We can not align that,
* decompresser could be confused by aligned initrd_end
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 745d66b843c..2d19001151d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -53,25 +53,14 @@
#include <asm/page_types.h>
#include <asm/init.h>
+#include "mm_internal.h"
+
unsigned long highstart_pfn, highend_pfn;
static noinline int do_test_wp_bit(void);
bool __read_mostly __vmalloc_start_set = false;
-static __init void *alloc_low_page(void)
-{
- unsigned long pfn = pgt_buf_end++;
- void *adr;
-
- if (pfn >= pgt_buf_top)
- panic("alloc_low_page: ran out of memory");
-
- adr = __va(pfn * PAGE_SIZE);
- clear_page(adr);
- return adr;
-}
-
/*
* Creates a middle page table and puts a pointer to it in the
* given global directory entry. This only returns the gd entry
@@ -84,10 +73,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
#ifdef CONFIG_X86_PAE
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
- if (after_bootmem)
- pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
- else
- pmd_table = (pmd_t *)alloc_low_page();
+ pmd_table = (pmd_t *)alloc_low_page();
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
@@ -109,17 +95,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
static pte_t * __init one_page_table_init(pmd_t *pmd)
{
if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
- pte_t *page_table = NULL;
-
- if (after_bootmem) {
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
- page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
-#endif
- if (!page_table)
- page_table =
- (pte_t *)alloc_bootmem_pages(PAGE_SIZE);
- } else
- page_table = (pte_t *)alloc_low_page();
+ pte_t *page_table = (pte_t *)alloc_low_page();
paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -146,8 +122,39 @@ pte_t * __init populate_extra_pte(unsigned long vaddr)
return one_page_table_init(pmd) + pte_idx;
}
+static unsigned long __init
+page_table_range_init_count(unsigned long start, unsigned long end)
+{
+ unsigned long count = 0;
+#ifdef CONFIG_HIGHMEM
+ int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
+ int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
+ int pgd_idx, pmd_idx;
+ unsigned long vaddr;
+
+ if (pmd_idx_kmap_begin == pmd_idx_kmap_end)
+ return 0;
+
+ vaddr = start;
+ pgd_idx = pgd_index(vaddr);
+
+ for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
+ for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
+ pmd_idx++) {
+ if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin &&
+ (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end)
+ count++;
+ vaddr += PMD_SIZE;
+ }
+ pmd_idx = 0;
+ }
+#endif
+ return count;
+}
+
static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
- unsigned long vaddr, pte_t *lastpte)
+ unsigned long vaddr, pte_t *lastpte,
+ void **adr)
{
#ifdef CONFIG_HIGHMEM
/*
@@ -161,16 +168,15 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
- && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
- && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
- || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
+ && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
pte_t *newpte;
int i;
BUG_ON(after_bootmem);
- newpte = alloc_low_page();
+ newpte = *adr;
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(newpte + i, pte[i]);
+ *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
@@ -204,6 +210,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte = NULL;
+ unsigned long count = page_table_range_init_count(start, end);
+ void *adr = NULL;
+
+ if (count)
+ adr = alloc_low_pages(count);
vaddr = start;
pgd_idx = pgd_index(vaddr);
@@ -216,7 +227,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
pmd++, pmd_idx++) {
pte = page_table_kmap_check(one_page_table_init(pmd),
- pmd, vaddr, pte);
+ pmd, vaddr, pte, &adr);
vaddr += PMD_SIZE;
}
@@ -310,6 +321,7 @@ repeat:
__pgprot(PTE_IDENT_ATTR |
_PAGE_PSE);
+ pfn &= PMD_MASK >> PAGE_SHIFT;
addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
PAGE_OFFSET + PAGE_SIZE-1;
@@ -455,9 +467,14 @@ void __init native_pagetable_init(void)
/*
* Remove any mappings which extend past the end of physical
- * memory from the boot time page table:
+ * memory from the boot time page table.
+ * In virtual address space, we should have at least two pages
+ * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END
+ * definition. And max_low_pfn is set to VMALLOC_END physical
+ * address. If initial memory mapping is doing right job, we
+ * should have pte used near max_low_pfn or one pmd is not present.
*/
- for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
+ for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
pgd = base + pgd_index(va);
if (!pgd_present(*pgd))
@@ -468,10 +485,19 @@ void __init native_pagetable_init(void)
if (!pmd_present(*pmd))
break;
+ /* should not be large page here */
+ if (pmd_large(*pmd)) {
+ pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n",
+ pfn, pmd, __pa(pmd));
+ BUG_ON(1);
+ }
+
pte = pte_offset_kernel(pmd, va);
if (!pte_present(*pte))
break;
+ printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n",
+ pfn, pmd, __pa(pmd), pte, __pa(pte));
pte_clear(NULL, va, pte);
}
paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
@@ -550,7 +576,7 @@ early_param("highmem", parse_highmem);
* artificially via the highmem=x boot parameter then create
* it:
*/
-void __init lowmem_pfn_init(void)
+static void __init lowmem_pfn_init(void)
{
/* max_low_pfn is 0, we already have early_res support */
max_low_pfn = max_pfn;
@@ -586,7 +612,7 @@ void __init lowmem_pfn_init(void)
* We have more RAM than fits into lowmem - we try to put it into
* highmem, also taking the highmem=x boot parameter into account:
*/
-void __init highmem_pfn_init(void)
+static void __init highmem_pfn_init(void)
{
max_low_pfn = MAXMEM_PFN;
@@ -669,8 +695,6 @@ void __init setup_bootmem_allocator(void)
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
-
- after_bootmem = 1;
}
/*
@@ -753,6 +777,8 @@ void __init mem_init(void)
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
reservedpages++;
+ after_bootmem = 1;
+
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
@@ -836,6 +862,18 @@ int arch_add_memory(int nid, u64 start, u64 size)
return __add_pages(nid, zone, start_pfn, nr_pages);
}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ struct zone *zone;
+
+ zone = page_zone(pfn_to_page(start_pfn));
+ return __remove_pages(zone, start_pfn, nr_pages);
+}
+#endif
#endif
/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2ead3c8a4c8..474e28f1081 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -54,6 +54,82 @@
#include <asm/uv/uv.h>
#include <asm/setup.h>
+#include "mm_internal.h"
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+ unsigned long addr, unsigned long end)
+{
+ addr &= PMD_MASK;
+ for (; addr < end; addr += PMD_SIZE) {
+ pmd_t *pmd = pmd_page + pmd_index(addr);
+
+ if (!pmd_present(*pmd))
+ set_pmd(pmd, __pmd(addr | pmd_flag));
+ }
+}
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next) {
+ pud_t *pud = pud_page + pud_index(addr);
+ pmd_t *pmd;
+
+ next = (addr & PUD_MASK) + PUD_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud, 0);
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ continue;
+ }
+ pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+ if (!pmd)
+ return -ENOMEM;
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ int result;
+ int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+ for (; addr < end; addr = next) {
+ pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+ pud_t *pud;
+
+ next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, 0);
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ continue;
+ }
+
+ pud = (pud_t *)info->alloc_pgt_page(info->context);
+ if (!pud)
+ return -ENOMEM;
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
+
static int __init parse_direct_gbpages_off(char *arg)
{
direct_gbpages = 0;
@@ -302,10 +378,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
void __init cleanup_highmap(void)
{
unsigned long vaddr = __START_KERNEL_map;
- unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+ unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE;
unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
pmd_t *pmd = level2_kernel_pgt;
+ /*
+ * Native path, max_pfn_mapped is not set yet.
+ * Xen has valid max_pfn_mapped set in
+ * arch/x86/xen/mmu.c:xen_setup_kernel_pagetable().
+ */
+ if (max_pfn_mapped)
+ vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+
for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
if (pmd_none(*pmd))
continue;
@@ -314,69 +398,24 @@ void __init cleanup_highmap(void)
}
}
-static __ref void *alloc_low_page(unsigned long *phys)
-{
- unsigned long pfn = pgt_buf_end++;
- void *adr;
-
- if (after_bootmem) {
- adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
- *phys = __pa(adr);
-
- return adr;
- }
-
- if (pfn >= pgt_buf_top)
- panic("alloc_low_page: ran out of memory");
-
- adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
- clear_page(adr);
- *phys = pfn * PAGE_SIZE;
- return adr;
-}
-
-static __ref void *map_low_page(void *virt)
-{
- void *adr;
- unsigned long phys, left;
-
- if (after_bootmem)
- return virt;
-
- phys = __pa(virt);
- left = phys & (PAGE_SIZE - 1);
- adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
- adr = (void *)(((unsigned long)adr) | left);
-
- return adr;
-}
-
-static __ref void unmap_low_page(void *adr)
-{
- if (after_bootmem)
- return;
-
- early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
-}
-
static unsigned long __meminit
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
pgprot_t prot)
{
- unsigned pages = 0;
+ unsigned long pages = 0, next;
unsigned long last_map_addr = end;
int i;
pte_t *pte = pte_page + pte_index(addr);
- for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
-
+ for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
+ next = (addr & PAGE_MASK) + PAGE_SIZE;
if (addr >= end) {
- if (!after_bootmem) {
- for(; i < PTRS_PER_PTE; i++, pte++)
- set_pte(pte, __pte(0));
- }
- break;
+ if (!after_bootmem &&
+ !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
+ !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+ set_pte(pte, __pte(0));
+ continue;
}
/*
@@ -414,28 +453,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
int i = pmd_index(address);
for (; i < PTRS_PER_PMD; i++, address = next) {
- unsigned long pte_phys;
pmd_t *pmd = pmd_page + pmd_index(address);
pte_t *pte;
pgprot_t new_prot = prot;
+ next = (address & PMD_MASK) + PMD_SIZE;
if (address >= end) {
- if (!after_bootmem) {
- for (; i < PTRS_PER_PMD; i++, pmd++)
- set_pmd(pmd, __pmd(0));
- }
- break;
+ if (!after_bootmem &&
+ !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
+ !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+ set_pmd(pmd, __pmd(0));
+ continue;
}
- next = (address & PMD_MASK) + PMD_SIZE;
-
if (pmd_val(*pmd)) {
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
- pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+ pte = (pte_t *)pmd_page_vaddr(*pmd);
last_map_addr = phys_pte_init(pte, address,
end, prot);
- unmap_low_page(pte);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@@ -464,19 +500,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
pages++;
spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pmd,
- pfn_pte(address >> PAGE_SHIFT,
+ pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
__pgprot(pgprot_val(prot) | _PAGE_PSE)));
spin_unlock(&init_mm.page_table_lock);
last_map_addr = next;
continue;
}
- pte = alloc_low_page(&pte_phys);
+ pte = alloc_low_page();
last_map_addr = phys_pte_init(pte, address, end, new_prot);
- unmap_low_page(pte);
spin_lock(&init_mm.page_table_lock);
- pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
+ pmd_populate_kernel(&init_mm, pmd, pte);
spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
@@ -492,27 +527,24 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
int i = pud_index(addr);
for (; i < PTRS_PER_PUD; i++, addr = next) {
- unsigned long pmd_phys;
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
pgprot_t prot = PAGE_KERNEL;
- if (addr >= end)
- break;
-
next = (addr & PUD_MASK) + PUD_SIZE;
-
- if (!after_bootmem && !e820_any_mapped(addr, next, 0)) {
- set_pud(pud, __pud(0));
+ if (addr >= end) {
+ if (!after_bootmem &&
+ !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
+ !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+ set_pud(pud, __pud(0));
continue;
}
if (pud_val(*pud)) {
if (!pud_large(*pud)) {
- pmd = map_low_page(pmd_offset(pud, 0));
+ pmd = pmd_offset(pud, 0);
last_map_addr = phys_pmd_init(pmd, addr, end,
page_size_mask, prot);
- unmap_low_page(pmd);
__flush_tlb_all();
continue;
}
@@ -541,19 +573,19 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
pages++;
spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pud,
- pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+ pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
+ PAGE_KERNEL_LARGE));
spin_unlock(&init_mm.page_table_lock);
last_map_addr = next;
continue;
}
- pmd = alloc_low_page(&pmd_phys);
+ pmd = alloc_low_page();
last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
prot);
- unmap_low_page(pmd);
spin_lock(&init_mm.page_table_lock);
- pud_populate(&init_mm, pud, __va(pmd_phys));
+ pud_populate(&init_mm, pud, pmd);
spin_unlock(&init_mm.page_table_lock);
}
__flush_tlb_all();
@@ -578,34 +610,29 @@ kernel_physical_mapping_init(unsigned long start,
for (; start < end; start = next) {
pgd_t *pgd = pgd_offset_k(start);
- unsigned long pud_phys;
pud_t *pud;
- next = (start + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end)
- next = end;
+ next = (start & PGDIR_MASK) + PGDIR_SIZE;
if (pgd_val(*pgd)) {
- pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+ pud = (pud_t *)pgd_page_vaddr(*pgd);
last_map_addr = phys_pud_init(pud, __pa(start),
__pa(end), page_size_mask);
- unmap_low_page(pud);
continue;
}
- pud = alloc_low_page(&pud_phys);
- last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
+ pud = alloc_low_page();
+ last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
page_size_mask);
- unmap_low_page(pud);
spin_lock(&init_mm.page_table_lock);
- pgd_populate(&init_mm, pgd, __va(pud_phys));
+ pgd_populate(&init_mm, pgd, pud);
spin_unlock(&init_mm.page_table_lock);
pgd_changed = true;
}
if (pgd_changed)
- sync_global_pgds(addr, end);
+ sync_global_pgds(addr, end - 1);
__flush_tlb_all();
@@ -664,13 +691,11 @@ int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdat = NODE_DATA(nid);
struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
- unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
- last_mapped_pfn = init_memory_mapping(start, start + size);
- if (last_mapped_pfn > max_pfn_mapped)
- max_pfn_mapped = last_mapped_pfn;
+ init_memory_mapping(start, start + size);
ret = __add_pages(nid, zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
@@ -682,10 +707,357 @@ int arch_add_memory(int nid, u64 start, u64 size)
}
EXPORT_SYMBOL_GPL(arch_add_memory);
+#define PAGE_INUSE 0xFD
+
+static void __meminit free_pagetable(struct page *page, int order)
+{
+ struct zone *zone;
+ bool bootmem = false;
+ unsigned long magic;
+ unsigned int nr_pages = 1 << order;
+
+ /* bootmem page has reserved flag */
+ if (PageReserved(page)) {
+ __ClearPageReserved(page);
+ bootmem = true;
+
+ magic = (unsigned long)page->lru.next;
+ if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+ while (nr_pages--)
+ put_page_bootmem(page++);
+ } else
+ __free_pages_bootmem(page, order);
+ } else
+ free_pages((unsigned long)page_address(page), order);
+
+ /*
+ * SECTION_INFO pages and MIX_SECTION_INFO pages
+ * are all allocated by bootmem.
+ */
+ if (bootmem) {
+ zone = page_zone(page);
+ zone_span_writelock(zone);
+ zone->present_pages += nr_pages;
+ zone_span_writeunlock(zone);
+ totalram_pages += nr_pages;
+ }
+}
+
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
+ pte_t *pte;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte = pte_start + i;
+ if (pte_val(*pte))
+ return;
+ }
+
+ /* free a pte talbe */
+ free_pagetable(pmd_page(*pmd), 0);
+ spin_lock(&init_mm.page_table_lock);
+ pmd_clear(pmd);
+ spin_unlock(&init_mm.page_table_lock);
+}
+
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+{
+ pmd_t *pmd;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd = pmd_start + i;
+ if (pmd_val(*pmd))
+ return;
+ }
+
+ /* free a pmd talbe */
+ free_pagetable(pud_page(*pud), 0);
+ spin_lock(&init_mm.page_table_lock);
+ pud_clear(pud);
+ spin_unlock(&init_mm.page_table_lock);
+}
+
+/* Return true if pgd is changed, otherwise return false. */
+static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
+{
+ pud_t *pud;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ pud = pud_start + i;
+ if (pud_val(*pud))
+ return false;
+ }
+
+ /* free a pud table */
+ free_pagetable(pgd_page(*pgd), 0);
+ spin_lock(&init_mm.page_table_lock);
+ pgd_clear(pgd);
+ spin_unlock(&init_mm.page_table_lock);
+
+ return true;
+}
+
+static void __meminit
+remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
+ bool direct)
+{
+ unsigned long next, pages = 0;
+ pte_t *pte;
+ void *page_addr;
+ phys_addr_t phys_addr;
+
+ pte = pte_start + pte_index(addr);
+ for (; addr < end; addr = next, pte++) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ if (next > end)
+ next = end;
+
+ if (!pte_present(*pte))
+ continue;
+
+ /*
+ * We mapped [0,1G) memory as identity mapping when
+ * initializing, in arch/x86/kernel/head_64.S. These
+ * pagetables cannot be removed.
+ */
+ phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
+ if (phys_addr < (phys_addr_t)0x40000000)
+ return;
+
+ if (IS_ALIGNED(addr, PAGE_SIZE) &&
+ IS_ALIGNED(next, PAGE_SIZE)) {
+ /*
+ * Do not free direct mapping pages since they were
+ * freed when offlining, or simplely not in use.
+ */
+ if (!direct)
+ free_pagetable(pte_page(*pte), 0);
+
+ spin_lock(&init_mm.page_table_lock);
+ pte_clear(&init_mm, addr, pte);
+ spin_unlock(&init_mm.page_table_lock);
+
+ /* For non-direct mapping, pages means nothing. */
+ pages++;
+ } else {
+ /*
+ * If we are here, we are freeing vmemmap pages since
+ * direct mapped memory ranges to be freed are aligned.
+ *
+ * If we are not removing the whole page, it means
+ * other page structs in this page are being used and
+ * we canot remove them. So fill the unused page_structs
+ * with 0xFD, and remove the page when it is wholly
+ * filled with 0xFD.
+ */
+ memset((void *)addr, PAGE_INUSE, next - addr);
+
+ page_addr = page_address(pte_page(*pte));
+ if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
+ free_pagetable(pte_page(*pte), 0);
+
+ spin_lock(&init_mm.page_table_lock);
+ pte_clear(&init_mm, addr, pte);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ }
+ }
+
+ /* Call free_pte_table() in remove_pmd_table(). */
+ flush_tlb_all();
+ if (direct)
+ update_page_count(PG_LEVEL_4K, -pages);
+}
+
+static void __meminit
+remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
+ bool direct)
+{
+ unsigned long next, pages = 0;
+ pte_t *pte_base;
+ pmd_t *pmd;
+ void *page_addr;
+
+ pmd = pmd_start + pmd_index(addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+
+ if (!pmd_present(*pmd))
+ continue;
+
+ if (pmd_large(*pmd)) {
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE)) {
+ if (!direct)
+ free_pagetable(pmd_page(*pmd),
+ get_order(PMD_SIZE));
+
+ spin_lock(&init_mm.page_table_lock);
+ pmd_clear(pmd);
+ spin_unlock(&init_mm.page_table_lock);
+ pages++;
+ } else {
+ /* If here, we are freeing vmemmap pages. */
+ memset((void *)addr, PAGE_INUSE, next - addr);
+
+ page_addr = page_address(pmd_page(*pmd));
+ if (!memchr_inv(page_addr, PAGE_INUSE,
+ PMD_SIZE)) {
+ free_pagetable(pmd_page(*pmd),
+ get_order(PMD_SIZE));
+
+ spin_lock(&init_mm.page_table_lock);
+ pmd_clear(pmd);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ }
+
+ continue;
+ }
+
+ pte_base = (pte_t *)pmd_page_vaddr(*pmd);
+ remove_pte_table(pte_base, addr, next, direct);
+ free_pte_table(pte_base, pmd);
+ }
+
+ /* Call free_pmd_table() in remove_pud_table(). */
+ if (direct)
+ update_page_count(PG_LEVEL_2M, -pages);
+}
+
+static void __meminit
+remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
+ bool direct)
+{
+ unsigned long next, pages = 0;
+ pmd_t *pmd_base;
+ pud_t *pud;
+ void *page_addr;
+
+ pud = pud_start + pud_index(addr);
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+
+ if (!pud_present(*pud))
+ continue;
+
+ if (pud_large(*pud)) {
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
+ IS_ALIGNED(next, PUD_SIZE)) {
+ if (!direct)
+ free_pagetable(pud_page(*pud),
+ get_order(PUD_SIZE));
+
+ spin_lock(&init_mm.page_table_lock);
+ pud_clear(pud);
+ spin_unlock(&init_mm.page_table_lock);
+ pages++;
+ } else {
+ /* If here, we are freeing vmemmap pages. */
+ memset((void *)addr, PAGE_INUSE, next - addr);
+
+ page_addr = page_address(pud_page(*pud));
+ if (!memchr_inv(page_addr, PAGE_INUSE,
+ PUD_SIZE)) {
+ free_pagetable(pud_page(*pud),
+ get_order(PUD_SIZE));
+
+ spin_lock(&init_mm.page_table_lock);
+ pud_clear(pud);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ }
+
+ continue;
+ }
+
+ pmd_base = (pmd_t *)pud_page_vaddr(*pud);
+ remove_pmd_table(pmd_base, addr, next, direct);
+ free_pmd_table(pmd_base, pud);
+ }
+
+ if (direct)
+ update_page_count(PG_LEVEL_1G, -pages);
+}
+
+/* start and end are both virtual address. */
+static void __meminit
+remove_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+ unsigned long next;
+ pgd_t *pgd;
+ pud_t *pud;
+ bool pgd_changed = false;
+
+ for (; start < end; start = next) {
+ next = pgd_addr_end(start, end);
+
+ pgd = pgd_offset_k(start);
+ if (!pgd_present(*pgd))
+ continue;
+
+ pud = (pud_t *)pgd_page_vaddr(*pgd);
+ remove_pud_table(pud, start, next, direct);
+ if (free_pud_table(pud, pgd))
+ pgd_changed = true;
+ }
+
+ if (pgd_changed)
+ sync_global_pgds(start, end - 1);
+
+ flush_tlb_all();
+}
+
+void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages)
+{
+ unsigned long start = (unsigned long)memmap;
+ unsigned long end = (unsigned long)(memmap + nr_pages);
+
+ remove_pagetable(start, end, false);
+}
+
+static void __meminit
+kernel_physical_mapping_remove(unsigned long start, unsigned long end)
+{
+ start = (unsigned long)__va(start);
+ end = (unsigned long)__va(end);
+
+ remove_pagetable(start, end, true);
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int __ref arch_remove_memory(u64 start, u64 size)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ struct zone *zone;
+ int ret;
+
+ zone = page_zone(pfn_to_page(start_pfn));
+ kernel_physical_mapping_remove(start, start + size);
+ ret = __remove_pages(zone, start_pfn, nr_pages);
+ WARN_ON_ONCE(ret);
+
+ return ret;
+}
+#endif
#endif /* CONFIG_MEMORY_HOTPLUG */
static struct kcore_list kcore_vsyscall;
+static void __init register_page_bootmem_info(void)
+{
+#ifdef CONFIG_NUMA
+ int i;
+
+ for_each_online_node(i)
+ register_page_bootmem_info_node(NODE_DATA(i));
+#endif
+}
+
void __init mem_init(void)
{
long codesize, reservedpages, datasize, initsize;
@@ -698,11 +1070,8 @@ void __init mem_init(void)
reservedpages = 0;
/* this will put all low memory onto the freelists */
-#ifdef CONFIG_NUMA
- totalram_pages = numa_free_all_bootmem();
-#else
+ register_page_bootmem_info();
totalram_pages = free_all_bootmem();
-#endif
absent_pages = absent_pages_in_range(0, max_pfn);
reservedpages = max_pfn - totalram_pages - absent_pages;
@@ -772,12 +1141,11 @@ void set_kernel_text_ro(void)
void mark_rodata_ro(void)
{
unsigned long start = PFN_ALIGN(_text);
- unsigned long rodata_start =
- ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+ unsigned long rodata_start = PFN_ALIGN(__start_rodata);
unsigned long end = (unsigned long) &__end_rodata_hpage_align;
- unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
- unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
- unsigned long data_start = (unsigned long) &_sdata;
+ unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
+ unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
+ unsigned long all_end = PFN_ALIGN(&_end);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@@ -786,10 +1154,10 @@ void mark_rodata_ro(void)
kernel_set_to_readonly = 1;
/*
- * The rodata section (but not the kernel text!) should also be
- * not-executable.
+ * The rodata/data/bss/brk section (but not the kernel text!)
+ * should also be not-executable.
*/
- set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
rodata_test();
@@ -802,12 +1170,12 @@ void mark_rodata_ro(void)
#endif
free_init_pages("unused kernel memory",
- (unsigned long) page_address(virt_to_page(text_end)),
- (unsigned long)
- page_address(virt_to_page(rodata_start)));
+ (unsigned long) __va(__pa_symbol(text_end)),
+ (unsigned long) __va(__pa_symbol(rodata_start)));
+
free_init_pages("unused kernel memory",
- (unsigned long) page_address(virt_to_page(rodata_end)),
- (unsigned long) page_address(virt_to_page(data_start)));
+ (unsigned long) __va(__pa_symbol(rodata_end)),
+ (unsigned long) __va(__pa_symbol(_sdata)));
}
#endif
@@ -831,6 +1199,9 @@ int kern_addr_valid(unsigned long addr)
if (pud_none(*pud))
return 0;
+ if (pud_large(*pud))
+ return pfn_valid(pud_pfn(*pud));
+
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return 0;
@@ -981,10 +1352,70 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
}
}
- sync_global_pgds((unsigned long)start_page, end);
+ sync_global_pgds((unsigned long)start_page, end - 1);
return 0;
}
+#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
+void register_page_bootmem_memmap(unsigned long section_nr,
+ struct page *start_page, unsigned long size)
+{
+ unsigned long addr = (unsigned long)start_page;
+ unsigned long end = (unsigned long)(start_page + size);
+ unsigned long next;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned int nr_pages;
+ struct page *page;
+
+ for (; addr < end; addr = next) {
+ pte_t *pte = NULL;
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd)) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ continue;
+ }
+ get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
+
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ continue;
+ }
+ get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
+
+ if (!cpu_has_pse) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ continue;
+ get_page_bootmem(section_nr, pmd_page(*pmd),
+ MIX_SECTION_INFO);
+
+ pte = pte_offset_kernel(pmd, addr);
+ if (pte_none(*pte))
+ continue;
+ get_page_bootmem(section_nr, pte_page(*pte),
+ SECTION_INFO);
+ } else {
+ next = pmd_addr_end(addr, end);
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ continue;
+
+ nr_pages = 1 << (get_order(PMD_SIZE));
+ page = pmd_page(*pmd);
+ while (nr_pages--)
+ get_page_bootmem(section_nr, page++,
+ SECTION_INFO);
+ }
+ }
+}
+#endif
+
void __meminit vmemmap_populate_print_last(void)
{
if (p_start) {
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index c80b9fb9573..8dabbed409e 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -9,6 +9,7 @@
#include <linux/memblock.h>
static u64 patterns[] __initdata = {
+ /* The first entry has to be 0 to leave memtest with zeroed memory */
0,
0xffffffffffffffffULL,
0x5555555555555555ULL,
@@ -110,15 +111,8 @@ void __init early_memtest(unsigned long start, unsigned long end)
return;
printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
- for (i = 0; i < memtest_pattern; i++) {
+ for (i = memtest_pattern-1; i < UINT_MAX; --i) {
idx = i % ARRAY_SIZE(patterns);
do_one_pass(patterns[idx], start, end);
}
-
- if (idx > 0) {
- printk(KERN_INFO "early_memtest: wipe out "
- "test pattern from memory\n");
- /* additional test with pattern 0 will do this */
- do_one_pass(0, start, end);
- }
}
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
new file mode 100644
index 00000000000..6b563a11889
--- /dev/null
+++ b/arch/x86/mm/mm_internal.h
@@ -0,0 +1,19 @@
+#ifndef __X86_MM_INTERNAL_H
+#define __X86_MM_INTERNAL_H
+
+void *alloc_low_pages(unsigned int num);
+static inline void *alloc_low_page(void)
+{
+ return alloc_low_pages(1);
+}
+
+void early_ioremap_page_table_range_init(void);
+
+unsigned long kernel_physical_mapping_init(unsigned long start,
+ unsigned long end,
+ unsigned long page_size_mask);
+void zone_sizes_init(void);
+
+extern int after_bootmem;
+
+#endif /* __X86_MM_INTERNAL_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 2d125be1bae..dfd30259eb8 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -56,7 +56,7 @@ early_param("numa", numa_setup);
/*
* apicid, cpu, node mappings
*/
-s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+s16 __apicid_to_node[MAX_LOCAL_APIC] = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-void __cpuinit numa_set_node(int cpu, int node)
+void numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
@@ -101,7 +101,7 @@ void __cpuinit numa_set_node(int cpu, int node)
set_cpu_numa_node(cpu, node);
}
-void __cpuinit numa_clear_node(int cpu)
+void numa_clear_node(int cpu)
{
numa_set_node(cpu, NUMA_NO_NODE);
}
@@ -193,7 +193,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
static void __init setup_node_data(int nid, u64 start, u64 end)
{
const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
- bool remapped = false;
u64 nd_pa;
void *nd;
int tnid;
@@ -205,37 +204,27 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
if (end && (end - start) < NODE_MIN_SIZE)
return;
- /* initialize remap allocator before aligning to ZONE_ALIGN */
- init_alloc_remap(nid, start, end);
-
start = roundup(start, ZONE_ALIGN);
printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
nid, start, end - 1);
/*
- * Allocate node data. Try remap allocator first, node-local
- * memory and then any node. Never allocate in DMA zone.
+ * Allocate node data. Try node-local memory and then any node.
+ * Never allocate in DMA zone.
*/
- nd = alloc_remap(nid, nd_size);
- if (nd) {
- nd_pa = __pa(nd);
- remapped = true;
- } else {
- nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
- if (!nd_pa) {
- pr_err("Cannot find %zu bytes in node %d\n",
- nd_size, nid);
- return;
- }
- nd = __va(nd_pa);
+ nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+ if (!nd_pa) {
+ pr_err("Cannot find %zu bytes in any node\n", nd_size);
+ return;
}
+ nd = __va(nd_pa);
/* report and initialize */
- printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n",
- nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
+ printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ nd_pa, nd_pa + nd_size - 1);
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
- if (!remapped && tnid != nid)
+ if (tnid != nid)
printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
node_data[nid] = nd;
@@ -571,10 +560,12 @@ static int __init numa_init(int (*init_func)(void))
for (i = 0; i < MAX_LOCAL_APIC; i++)
set_apicid_to_node(i, NUMA_NO_NODE);
- nodes_clear(numa_nodes_parsed);
+ /*
+ * Do not clear numa_nodes_parsed or zero numa_meminfo here, because
+ * SRAT was parsed earlier in early_parse_srat().
+ */
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
- memset(&numa_meminfo, 0, sizeof(numa_meminfo));
WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
numa_reset_distance();
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 534255a36b6..73a6d7395bd 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
extern unsigned long highend_pfn, highstart_pfn;
-#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
-
-static void *node_remap_start_vaddr[MAX_NUMNODES];
-void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-
-/*
- * Remap memory allocator
- */
-static unsigned long node_remap_start_pfn[MAX_NUMNODES];
-static void *node_remap_end_vaddr[MAX_NUMNODES];
-static void *node_remap_alloc_vaddr[MAX_NUMNODES];
-
-/**
- * alloc_remap - Allocate remapped memory
- * @nid: NUMA node to allocate memory from
- * @size: The size of allocation
- *
- * Allocate @size bytes from the remap area of NUMA node @nid. The
- * size of the remap area is predetermined by init_alloc_remap() and
- * only the callers considered there should call this function. For
- * more info, please read the comment on top of init_alloc_remap().
- *
- * The caller must be ready to handle allocation failure from this
- * function and fall back to regular memory allocator in such cases.
- *
- * CONTEXT:
- * Single CPU early boot context.
- *
- * RETURNS:
- * Pointer to the allocated memory on success, %NULL on failure.
- */
-void *alloc_remap(int nid, unsigned long size)
-{
- void *allocation = node_remap_alloc_vaddr[nid];
-
- size = ALIGN(size, L1_CACHE_BYTES);
-
- if (!allocation || (allocation + size) > node_remap_end_vaddr[nid])
- return NULL;
-
- node_remap_alloc_vaddr[nid] += size;
- memset(allocation, 0, size);
-
- return allocation;
-}
-
-#ifdef CONFIG_HIBERNATION
-/**
- * resume_map_numa_kva - add KVA mapping to the temporary page tables created
- * during resume from hibernation
- * @pgd_base - temporary resume page directory
- */
-void resume_map_numa_kva(pgd_t *pgd_base)
-{
- int node;
-
- for_each_online_node(node) {
- unsigned long start_va, start_pfn, nr_pages, pfn;
-
- start_va = (unsigned long)node_remap_start_vaddr[node];
- start_pfn = node_remap_start_pfn[node];
- nr_pages = (node_remap_end_vaddr[node] -
- node_remap_start_vaddr[node]) >> PAGE_SHIFT;
-
- printk(KERN_DEBUG "%s: node %d\n", __func__, node);
-
- for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) {
- unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
- pgd_t *pgd = pgd_base + pgd_index(vaddr);
- pud_t *pud = pud_offset(pgd, vaddr);
- pmd_t *pmd = pmd_offset(pud, vaddr);
-
- set_pmd(pmd, pfn_pmd(start_pfn + pfn,
- PAGE_KERNEL_LARGE_EXEC));
-
- printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
- __func__, vaddr, start_pfn + pfn);
- }
- }
-}
-#endif
-
-/**
- * init_alloc_remap - Initialize remap allocator for a NUMA node
- * @nid: NUMA node to initizlie remap allocator for
- *
- * NUMA nodes may end up without any lowmem. As allocating pgdat and
- * memmap on a different node with lowmem is inefficient, a special
- * remap allocator is implemented which can be used by alloc_remap().
- *
- * For each node, the amount of memory which will be necessary for
- * pgdat and memmap is calculated and two memory areas of the size are
- * allocated - one in the node and the other in lowmem; then, the area
- * in the node is remapped to the lowmem area.
- *
- * As pgdat and memmap must be allocated in lowmem anyway, this
- * doesn't waste lowmem address space; however, the actual lowmem
- * which gets remapped over is wasted. The amount shouldn't be
- * problematic on machines this feature will be used.
- *
- * Initialization failure isn't fatal. alloc_remap() is used
- * opportunistically and the callers will fall back to other memory
- * allocation mechanisms on failure.
- */
-void __init init_alloc_remap(int nid, u64 start, u64 end)
-{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long end_pfn = end >> PAGE_SHIFT;
- unsigned long size, pfn;
- u64 node_pa, remap_pa;
- void *remap_va;
-
- /*
- * The acpi/srat node info can show hot-add memroy zones where
- * memory could be added but not currently present.
- */
- printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
- nid, start_pfn, end_pfn);
-
- /* calculate the necessary space aligned to large page size */
- size = node_memmap_size_bytes(nid, start_pfn, end_pfn);
- size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
- size = ALIGN(size, LARGE_PAGE_BYTES);
-
- /* allocate node memory and the lowmem remap area */
- node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
- if (!node_pa) {
- pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
- size, nid);
- return;
- }
- memblock_reserve(node_pa, size);
-
- remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
- max_low_pfn << PAGE_SHIFT,
- size, LARGE_PAGE_BYTES);
- if (!remap_pa) {
- pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
- size, nid);
- memblock_free(node_pa, size);
- return;
- }
- memblock_reserve(remap_pa, size);
- remap_va = phys_to_virt(remap_pa);
-
- /* perform actual remap */
- for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE)
- set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT),
- (node_pa >> PAGE_SHIFT) + pfn,
- PAGE_KERNEL_LARGE);
-
- /* initialize remap allocator parameters */
- node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
- node_remap_start_vaddr[nid] = remap_va;
- node_remap_end_vaddr[nid] = remap_va + size;
- node_remap_alloc_vaddr[nid] = remap_va;
-
- printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
- nid, node_pa, node_pa + size, remap_va, remap_va + size);
-}
-
void __init initmem_init(void)
{
x86_numa_init();
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 92e27119ee1..9405ffc9150 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -10,16 +10,3 @@ void __init initmem_init(void)
{
x86_numa_init();
}
-
-unsigned long __init numa_free_all_bootmem(void)
-{
- unsigned long pages = 0;
- int i;
-
- for_each_online_node(i)
- pages += free_all_bootmem_node(NODE_DATA(i));
-
- pages += free_low_memory_core_early(MAX_NUMNODES);
-
- return pages;
-}
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index 7178c3afe05..ad86ec91e64 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -21,12 +21,6 @@ void __init numa_reset_distance(void);
void __init x86_numa_init(void);
-#ifdef CONFIG_X86_64
-static inline void init_alloc_remap(int nid, u64 start, u64 end) { }
-#else
-void __init init_alloc_remap(int nid, u64 start, u64 end);
-#endif
-
#ifdef CONFIG_NUMA_EMU
void __init numa_emulation(struct numa_meminfo *numa_meminfo,
int numa_dist_cnt);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a718e0d2350..ca1f1c2bb7b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -94,12 +94,12 @@ static inline void split_page_count(int level) { }
static inline unsigned long highmap_start_pfn(void)
{
- return __pa(_text) >> PAGE_SHIFT;
+ return __pa_symbol(_text) >> PAGE_SHIFT;
}
static inline unsigned long highmap_end_pfn(void)
{
- return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
+ return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
}
#endif
@@ -276,8 +276,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
* The .rodata section needs to be read-only. Using the pfn
* catches all aliases.
*/
- if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
- __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
+ if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
+ __pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
@@ -364,6 +364,37 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
EXPORT_SYMBOL_GPL(lookup_address);
/*
+ * This is necessary because __pa() does not work on some
+ * kinds of memory, like vmalloc() or the alloc_remap()
+ * areas on 32-bit NUMA systems. The percpu areas can
+ * end up in this kind of memory, for instance.
+ *
+ * This could be optimized, but it is only intended to be
+ * used at inititalization time, and keeping it
+ * unoptimized should increase the testing coverage for
+ * the more obscure platforms.
+ */
+phys_addr_t slow_virt_to_phys(void *__virt_addr)
+{
+ unsigned long virt_addr = (unsigned long)__virt_addr;
+ phys_addr_t phys_addr;
+ unsigned long offset;
+ enum pg_level level;
+ unsigned long psize;
+ unsigned long pmask;
+ pte_t *pte;
+
+ pte = lookup_address(virt_addr, &level);
+ BUG_ON(!pte);
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+ offset = virt_addr & ~pmask;
+ phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+ return (phys_addr | offset);
+}
+EXPORT_SYMBOL_GPL(slow_virt_to_phys);
+
+/*
* Set the new pmd in all the pgds we know about:
*/
static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
@@ -396,7 +427,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
- unsigned int level;
+ enum pg_level level;
if (cpa->force_split)
return 1;
@@ -412,15 +443,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
switch (level) {
case PG_LEVEL_2M:
- psize = PMD_PAGE_SIZE;
- pmask = PMD_PAGE_MASK;
- break;
#ifdef CONFIG_X86_64
case PG_LEVEL_1G:
- psize = PUD_PAGE_SIZE;
- pmask = PUD_PAGE_MASK;
- break;
#endif
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+ break;
default:
do_split = -EINVAL;
goto out_unlock;
@@ -501,21 +529,13 @@ out_unlock:
return do_split;
}
-static int split_large_page(pte_t *kpte, unsigned long address)
+int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
{
unsigned long pfn, pfninc = 1;
unsigned int i, level;
- pte_t *pbase, *tmp;
+ pte_t *tmp;
pgprot_t ref_prot;
- struct page *base;
-
- if (!debug_pagealloc)
- spin_unlock(&cpa_lock);
- base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
- if (!debug_pagealloc)
- spin_lock(&cpa_lock);
- if (!base)
- return -ENOMEM;
+ struct page *base = virt_to_page(pbase);
spin_lock(&pgd_lock);
/*
@@ -523,10 +543,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
* up for us already:
*/
tmp = lookup_address(address, &level);
- if (tmp != kpte)
- goto out_unlock;
+ if (tmp != kpte) {
+ spin_unlock(&pgd_lock);
+ return 1;
+ }
- pbase = (pte_t *)page_address(base);
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
ref_prot = pte_pgprot(pte_clrhuge(*kpte));
/*
@@ -551,16 +572,10 @@ static int split_large_page(pte_t *kpte, unsigned long address)
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
- if (address >= (unsigned long)__va(0) &&
- address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
+ if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
+ PFN_DOWN(__pa(address)) + 1))
split_page_count(level);
-#ifdef CONFIG_X86_64
- if (address >= (unsigned long)__va(1UL<<32) &&
- address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
- split_page_count(level);
-#endif
-
/*
* Install the new, split up pagetable.
*
@@ -579,17 +594,27 @@ static int split_large_page(pte_t *kpte, unsigned long address)
* going on.
*/
__flush_tlb_all();
+ spin_unlock(&pgd_lock);
+
+ return 0;
+}
- base = NULL;
+static int split_large_page(pte_t *kpte, unsigned long address)
+{
+ pte_t *pbase;
+ struct page *base;
-out_unlock:
- /*
- * If we dropped out via the lookup_address check under
- * pgd_lock then stick the page back into the pool:
- */
- if (base)
+ if (!debug_pagealloc)
+ spin_unlock(&cpa_lock);
+ base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
+ if (!debug_pagealloc)
+ spin_lock(&cpa_lock);
+ if (!base)
+ return -ENOMEM;
+
+ pbase = (pte_t *)page_address(base);
+ if (__split_large_page(kpte, address, pbase))
__free_page(base);
- spin_unlock(&pgd_lock);
return 0;
}
@@ -729,13 +754,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
unsigned long vaddr;
int ret;
- if (cpa->pfn >= max_pfn_mapped)
+ if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
return 0;
-#ifdef CONFIG_X86_64
- if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
- return 0;
-#endif
/*
* No need to redo, when the primary call touched the direct
* mapping already:
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 0eb572eda40..2610bd93c89 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -560,10 +560,10 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
{
unsigned long id_sz;
- if (base >= __pa(high_memory))
+ if (base > __pa(high_memory-1))
return 0;
- id_sz = (__pa(high_memory) < base + size) ?
+ id_sz = (__pa(high_memory-1) <= base + size) ?
__pa(high_memory) - base :
size;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e27fbf887f3..193350b51f9 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -334,7 +334,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) {
*pmdp = entry;
pmd_update_defer(vma->vm_mm, address, pmdp);
- flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ /*
+ * We had a write-protection fault here and changed the pmd
+ * to to more permissive. No need to flush the TLB for that,
+ * #PF is architecturally guaranteed to do that and in the
+ * worst-case we'll generate a spurious fault.
+ */
}
return changed;
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index d2e2735327b..e666cbbb926 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -1,3 +1,4 @@
+#include <linux/bootmem.h>
#include <linux/mmdebug.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -8,33 +9,54 @@
#ifdef CONFIG_X86_64
+#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
- if (x >= __START_KERNEL_map) {
- x -= __START_KERNEL_map;
- VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
- x += phys_base;
+ unsigned long y = x - __START_KERNEL_map;
+
+ /* use the carry flag to determine if x was < __START_KERNEL_map */
+ if (unlikely(x > y)) {
+ x = y + phys_base;
+
+ VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
} else {
- VIRTUAL_BUG_ON(x < PAGE_OFFSET);
- x -= PAGE_OFFSET;
- VIRTUAL_BUG_ON(!phys_addr_valid(x));
+ x = y + (__START_KERNEL_map - PAGE_OFFSET);
+
+ /* carry flag will be set if starting x was >= PAGE_OFFSET */
+ VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x));
}
+
return x;
}
EXPORT_SYMBOL(__phys_addr);
+unsigned long __phys_addr_symbol(unsigned long x)
+{
+ unsigned long y = x - __START_KERNEL_map;
+
+ /* only check upper bounds since lower bounds will trigger carry */
+ VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+
+ return y + phys_base;
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
+#endif
+
bool __virt_addr_valid(unsigned long x)
{
- if (x >= __START_KERNEL_map) {
- x -= __START_KERNEL_map;
- if (x >= KERNEL_IMAGE_SIZE)
+ unsigned long y = x - __START_KERNEL_map;
+
+ /* use the carry flag to determine if x was < __START_KERNEL_map */
+ if (unlikely(x > y)) {
+ x = y + phys_base;
+
+ if (y >= KERNEL_IMAGE_SIZE)
return false;
- x += phys_base;
} else {
- if (x < PAGE_OFFSET)
- return false;
- x -= PAGE_OFFSET;
- if (!phys_addr_valid(x))
+ x = y + (__START_KERNEL_map - PAGE_OFFSET);
+
+ /* carry flag will be set if starting x was >= PAGE_OFFSET */
+ if ((x > y) || !phys_addr_valid(x))
return false;
}
@@ -47,10 +69,16 @@ EXPORT_SYMBOL(__virt_addr_valid);
#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
+ unsigned long phys_addr = x - PAGE_OFFSET;
/* VMALLOC_* aren't constants */
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
- return x - PAGE_OFFSET;
+ /* max_low_pfn is set early, but not _that_ early */
+ if (max_low_pfn) {
+ VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn);
+ BUG_ON(slow_virt_to_phys((void *)x) != phys_addr);
+ }
+ return phys_addr;
}
EXPORT_SYMBOL(__phys_addr);
#endif
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 4ddf497ca65..79836d01f78 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -141,47 +141,167 @@ static inline int save_add_info(void) {return 1;}
static inline int save_add_info(void) {return 0;}
#endif
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+static void __init
+handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
+{
+ int overlap, i;
+ unsigned long start_pfn, end_pfn;
+
+ start_pfn = PFN_DOWN(start);
+ end_pfn = PFN_UP(end);
+
+ /*
+ * For movablemem_map=acpi:
+ *
+ * SRAT: |_____| |_____| |_________| |_________| ......
+ * node id: 0 1 1 2
+ * hotpluggable: n y y n
+ * movablemem_map: |_____| |_________|
+ *
+ * Using movablemem_map, we can prevent memblock from allocating memory
+ * on ZONE_MOVABLE at boot time.
+ *
+ * Before parsing SRAT, memblock has already reserve some memory ranges
+ * for other purposes, such as for kernel image. We cannot prevent
+ * kernel from using these memory, so we need to exclude these memory
+ * even if it is hotpluggable.
+ * Furthermore, to ensure the kernel has enough memory to boot, we make
+ * all the memory on the node which the kernel resides in
+ * un-hotpluggable.
+ */
+ if (hotpluggable && movablemem_map.acpi) {
+ /* Exclude ranges reserved by memblock. */
+ struct memblock_type *rgn = &memblock.reserved;
+
+ for (i = 0; i < rgn->cnt; i++) {
+ if (end <= rgn->regions[i].base ||
+ start >= rgn->regions[i].base +
+ rgn->regions[i].size)
+ continue;
+
+ /*
+ * If the memory range overlaps the memory reserved by
+ * memblock, then the kernel resides in this node.
+ */
+ node_set(node, movablemem_map.numa_nodes_kernel);
+
+ goto out;
+ }
+
+ /*
+ * If the kernel resides in this node, then the whole node
+ * should not be hotpluggable.
+ */
+ if (node_isset(node, movablemem_map.numa_nodes_kernel))
+ goto out;
+
+ insert_movablemem_map(start_pfn, end_pfn);
+
+ /*
+ * numa_nodes_hotplug nodemask represents which nodes are put
+ * into movablemem_map.map[].
+ */
+ node_set(node, movablemem_map.numa_nodes_hotplug);
+ goto out;
+ }
+
+ /*
+ * For movablemem_map=nn[KMG]@ss[KMG]:
+ *
+ * SRAT: |_____| |_____| |_________| |_________| ......
+ * node id: 0 1 1 2
+ * user specified: |__| |___|
+ * movablemem_map: |___| |_________| |______| ......
+ *
+ * Using movablemem_map, we can prevent memblock from allocating memory
+ * on ZONE_MOVABLE at boot time.
+ *
+ * NOTE: In this case, SRAT info will be ingored.
+ */
+ overlap = movablemem_map_overlap(start_pfn, end_pfn);
+ if (overlap >= 0) {
+ /*
+ * If part of this range is in movablemem_map, we need to
+ * add the range after it to extend the range to the end
+ * of the node, because from the min address specified to
+ * the end of the node will be ZONE_MOVABLE.
+ */
+ start_pfn = max(start_pfn,
+ movablemem_map.map[overlap].start_pfn);
+ insert_movablemem_map(start_pfn, end_pfn);
+
+ /*
+ * Set the nodemask, so that if the address range on one node
+ * is not continuse, we can add the subsequent ranges on the
+ * same node into movablemem_map.
+ */
+ node_set(node, movablemem_map.numa_nodes_hotplug);
+ } else {
+ if (node_isset(node, movablemem_map.numa_nodes_hotplug))
+ /*
+ * Insert the range if we already have movable ranges
+ * on the same node.
+ */
+ insert_movablemem_map(start_pfn, end_pfn);
+ }
+out:
+ return;
+}
+#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+static inline void
+handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
+{
+}
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
int __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
u64 start, end;
+ u32 hotpluggable;
int node, pxm;
if (srat_disabled())
- return -1;
- if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
- bad_srat();
- return -1;
- }
+ goto out_err;
+ if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
+ goto out_err_bad_srat;
if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
- return -1;
+ goto out_err;
+ hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
+ if (hotpluggable && !save_add_info())
+ goto out_err;
- if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
- return -1;
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
if (acpi_srat_revision <= 1)
pxm &= 0xff;
+
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains.\n");
- bad_srat();
- return -1;
+ goto out_err_bad_srat;
}
- if (numa_add_memblk(node, start, end) < 0) {
- bad_srat();
- return -1;
- }
+ if (numa_add_memblk(node, start, end) < 0)
+ goto out_err_bad_srat;
node_set(node, numa_nodes_parsed);
- printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
+ printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n",
node, pxm,
- (unsigned long long) start, (unsigned long long) end - 1);
+ (unsigned long long) start, (unsigned long long) end - 1,
+ hotpluggable ? "Hot Pluggable": "");
+
+ handle_movablemem(node, start, end, hotpluggable);
+
return 0;
+out_err_bad_srat:
+ bad_srat();
+out_err:
+ return -1;
}
void __init acpi_numa_arch_fixup(void) {}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 13a6b29e2e5..282375f13c7 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -335,7 +335,7 @@ static const struct file_operations fops_tlbflush = {
.llseek = default_llseek,
};
-static int __cpuinit create_tlb_flushall_shift(void)
+static int __init create_tlb_flushall_shift(void)
{
debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
arch_debugfs_dir, NULL, &fops_tlbflush);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index d11a47099d3..3cbe45381bb 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,6 +1,6 @@
/* bpf_jit_comp.c : BPF JIT compiler
*
- * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -124,6 +124,26 @@ static inline void bpf_flush_icache(void *start, void *end)
#define CHOOSE_LOAD_FUNC(K, func) \
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+/* Helper to find the offset of pkt_type in sk_buff
+ * We want to make sure its still a 3bit field starting at a byte boundary.
+ */
+#define PKT_TYPE_MAX 7
+static int pkt_type_offset(void)
+{
+ struct sk_buff skb_probe = {
+ .pkt_type = ~0,
+ };
+ char *ct = (char *)&skb_probe;
+ unsigned int off;
+
+ for (off = 0; off < sizeof(struct sk_buff); off++) {
+ if (ct[off] == PKT_TYPE_MAX)
+ return off;
+ }
+ pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
+ return -1;
+}
+
void bpf_jit_compile(struct sk_filter *fp)
{
u8 temp[64];
@@ -216,6 +236,7 @@ void bpf_jit_compile(struct sk_filter *fp)
case BPF_S_ANC_VLAN_TAG:
case BPF_S_ANC_VLAN_TAG_PRESENT:
case BPF_S_ANC_QUEUE:
+ case BPF_S_ANC_PKTTYPE:
case BPF_S_LD_W_ABS:
case BPF_S_LD_H_ABS:
case BPF_S_LD_B_ABS:
@@ -536,6 +557,23 @@ void bpf_jit_compile(struct sk_filter *fp)
EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */
}
break;
+ case BPF_S_ANC_PKTTYPE:
+ {
+ int off = pkt_type_offset();
+
+ if (off < 0)
+ goto out;
+ if (is_imm8(off)) {
+ /* movzbl off8(%rdi),%eax */
+ EMIT4(0x0f, 0xb6, 0x47, off);
+ } else {
+ /* movbl off32(%rdi),%eax */
+ EMIT3(0x0f, 0xb6, 0x87);
+ EMIT(off, 4);
+ }
+ EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */
+ break;
+ }
case BPF_S_LD_W_ABS:
func = CHOOSE_LOAD_FUNC(K, sk_load_word);
common_load: seen |= SEEN_DATAREF;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 0c01261fe5a..53ea60458e0 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -145,7 +145,7 @@ void __init pci_acpi_crs_quirks(void)
}
#ifdef CONFIG_PCI_MMCONFIG
-static int __devinit check_segment(u16 seg, struct device *dev, char *estr)
+static int check_segment(u16 seg, struct device *dev, char *estr)
{
if (seg) {
dev_err(dev,
@@ -168,9 +168,8 @@ static int __devinit check_segment(u16 seg, struct device *dev, char *estr)
return 0;
}
-static int __devinit setup_mcfg_map(struct pci_root_info *info,
- u16 seg, u8 start, u8 end,
- phys_addr_t addr)
+static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start,
+ u8 end, phys_addr_t addr)
{
int result;
struct device *dev = &info->bridge->dev;
@@ -208,7 +207,7 @@ static void teardown_mcfg_map(struct pci_root_info *info)
}
}
#else
-static int __devinit setup_mcfg_map(struct pci_root_info *info,
+static int setup_mcfg_map(struct pci_root_info *info,
u16 seg, u8 start, u8 end,
phys_addr_t addr)
{
@@ -474,7 +473,7 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
info);
}
-struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
{
struct acpi_device *device = root->device;
struct pci_root_info *info = NULL;
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index d37e2fec97e..c2735feb250 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -93,8 +93,8 @@ struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max,
return info;
}
-void __devinit update_res(struct pci_root_info *info, resource_size_t start,
- resource_size_t end, unsigned long flags, int merge)
+void update_res(struct pci_root_info *info, resource_size_t start,
+ resource_size_t end, unsigned long flags, int merge)
{
struct resource *res;
struct pci_root_res *root_res;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 1b1dda90a94..ccd0ab3ab89 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -81,14 +81,14 @@ struct pci_ops pci_root_ops = {
*/
DEFINE_RAW_SPINLOCK(pci_config_lock);
-static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
+static int can_skip_ioresource_align(const struct dmi_system_id *d)
{
pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
return 0;
}
-static const struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitconst = {
+static const struct dmi_system_id can_skip_pciprobe_dmi_table[] = {
/*
* Systems where PCI IO resource ISA alignment can be skipped
* when the ISA enable bit in the bridge control is not set
@@ -125,7 +125,7 @@ void __init dmi_check_skip_isa_align(void)
dmi_check_system(can_skip_pciprobe_dmi_table);
}
-static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+static void pcibios_fixup_device_resources(struct pci_dev *dev)
{
struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
struct resource *bar_r;
@@ -162,7 +162,7 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
* are examined.
*/
-void __devinit pcibios_fixup_bus(struct pci_bus *b)
+void pcibios_fixup_bus(struct pci_bus *b)
{
struct pci_dev *dev;
@@ -176,7 +176,7 @@ void __devinit pcibios_fixup_bus(struct pci_bus *b)
* on the kernel command line (which was parsed earlier).
*/
-static int __devinit set_bf_sort(const struct dmi_system_id *d)
+static int set_bf_sort(const struct dmi_system_id *d)
{
if (pci_bf_sort == pci_bf_sort_default) {
pci_bf_sort = pci_dmi_bf;
@@ -185,7 +185,7 @@ static int __devinit set_bf_sort(const struct dmi_system_id *d)
return 0;
}
-static void __devinit read_dmi_type_b1(const struct dmi_header *dm,
+static void read_dmi_type_b1(const struct dmi_header *dm,
void *private_data)
{
u8 *d = (u8 *)dm + 4;
@@ -207,7 +207,7 @@ static void __devinit read_dmi_type_b1(const struct dmi_header *dm,
}
}
-static int __devinit find_sort_method(const struct dmi_system_id *d)
+static int find_sort_method(const struct dmi_system_id *d)
{
dmi_walk(read_dmi_type_b1, NULL);
@@ -222,7 +222,7 @@ static int __devinit find_sort_method(const struct dmi_system_id *d)
* Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus)
*/
#ifdef __i386__
-static int __devinit assign_all_busses(const struct dmi_system_id *d)
+static int assign_all_busses(const struct dmi_system_id *d)
{
pci_probe |= PCI_ASSIGN_ALL_BUSSES;
printk(KERN_INFO "%s detected: enabling PCI bus# renumbering"
@@ -231,7 +231,7 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d)
}
#endif
-static int __devinit set_scan_all(const struct dmi_system_id *d)
+static int set_scan_all(const struct dmi_system_id *d)
{
printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n",
d->ident);
@@ -239,7 +239,7 @@ static int __devinit set_scan_all(const struct dmi_system_id *d)
return 0;
}
-static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
+static const struct dmi_system_id pciprobe_dmi_table[] = {
#ifdef __i386__
/*
* Laptops which need pci=assign-busses to see Cardbus cards
@@ -434,7 +434,8 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
.callback = set_scan_all,
.ident = "Stratus/NEC ftServer",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ftServer"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Stratus"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"),
},
},
{}
@@ -445,7 +446,7 @@ void __init dmi_check_pciprobe(void)
dmi_check_system(pciprobe_dmi_table);
}
-struct pci_bus * __devinit pcibios_scan_root(int busnum)
+struct pci_bus *pcibios_scan_root(int busnum)
{
struct pci_bus *bus = NULL;
@@ -664,7 +665,7 @@ int pci_ext_cfg_avail(void)
return 0;
}
-struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
+struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
{
LIST_HEAD(resources);
struct pci_bus *bus = NULL;
@@ -692,7 +693,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops,
return bus;
}
-struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno)
+struct pci_bus *pci_scan_bus_with_sysdata(int busno)
{
return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index af8a224db21..f5809fa2753 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -9,7 +9,7 @@
#include <linux/vgaarb.h>
#include <asm/pci_x86.h>
-static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+static void pci_fixup_i450nx(struct pci_dev *d)
{
/*
* i450NX -- Find and scan all secondary buses on all PXB's.
@@ -34,7 +34,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
-static void __devinit pci_fixup_i450gx(struct pci_dev *d)
+static void pci_fixup_i450gx(struct pci_dev *d)
{
/*
* i450GX and i450KX -- Find and scan all secondary buses.
@@ -48,7 +48,7 @@ static void __devinit pci_fixup_i450gx(struct pci_dev *d)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx);
-static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
+static void pci_fixup_umc_ide(struct pci_dev *d)
{
/*
* UM8886BF IDE controller sets region type bits incorrectly,
@@ -62,7 +62,7 @@ static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide);
-static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
+static void pci_fixup_ncr53c810(struct pci_dev *d)
{
/*
* NCR 53C810 returns class code 0 (at least on some systems).
@@ -75,7 +75,7 @@ static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810);
-static void __devinit pci_fixup_latency(struct pci_dev *d)
+static void pci_fixup_latency(struct pci_dev *d)
{
/*
* SiS 5597 and 5598 chipsets require latency timer set to
@@ -87,7 +87,7 @@ static void __devinit pci_fixup_latency(struct pci_dev *d)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency);
-static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
+static void pci_fixup_piix4_acpi(struct pci_dev *d)
{
/*
* PIIX4 ACPI device: hardwired IRQ9
@@ -163,7 +163,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_
* system to PCI bus no matter what are their window settings, so they are
* "transparent" (or subtractive decoding) from programmers point of view.
*/
-static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev)
+static void pci_fixup_transparent_bridge(struct pci_dev *dev)
{
if ((dev->device & 0xff00) == 0x2400)
dev->transparent = 1;
@@ -317,7 +317,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_r
* video device at this point.
*/
-static void __devinit pci_fixup_video(struct pci_dev *pdev)
+static void pci_fixup_video(struct pci_dev *pdev)
{
struct pci_dev *bridge;
struct pci_bus *bus;
@@ -357,7 +357,7 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
-static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
+static const struct dmi_system_id msi_k8t_dmi_table[] = {
{
.ident = "MSI-K8T-Neo2Fir",
.matches = {
@@ -378,7 +378,7 @@ static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
* The soundcard is only enabled, if the mainborad is identified
* via DMI-tables and the soundcard is detected to be off.
*/
-static void __devinit pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev)
+static void pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev)
{
unsigned char val;
if (!dmi_check_system(msi_k8t_dmi_table))
@@ -414,7 +414,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
*/
static u16 toshiba_line_size;
-static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] = {
+static const struct dmi_system_id toshiba_ohci1394_dmi_table[] = {
{
.ident = "Toshiba PS5 based laptop",
.matches = {
@@ -439,7 +439,7 @@ static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] =
{ }
};
-static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)
+static void pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)
{
if (!dmi_check_system(toshiba_ohci1394_dmi_table))
return; /* only applies to certain Toshibas (so far) */
@@ -450,7 +450,7 @@ static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032,
pci_pre_fixup_toshiba_ohci1394);
-static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev)
+static void pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev)
{
if (!dmi_check_system(toshiba_ohci1394_dmi_table))
return; /* only applies to certain Toshibas (so far) */
@@ -488,7 +488,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY,
* Siemens Nixdorf AG FSC Multiprocessor Interrupt Controller:
* prevent update of the BAR0, which doesn't look like a normal BAR.
*/
-static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev)
+static void pci_siemens_interrupt_controller(struct pci_dev *dev)
{
dev->resource[0].flags |= IORESOURCE_PCI_FIXED;
}
@@ -531,7 +531,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
*
* Match off the LPC and svid/sdid (older kernels lose the bridge subvendor)
*/
-static void __devinit twinhead_reserve_killing_zone(struct pci_dev *dev)
+static void twinhead_reserve_killing_zone(struct pci_dev *dev)
{
if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) {
pr_info("Reserving memory on Twinhead H12Y\n");
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index a1df191129d..4a2ab9cb365 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -10,7 +10,7 @@
* Discover remaining PCI buses in case there are peer host bridges.
* We use the number of last PCI bus provided by the PCI BIOS.
*/
-static void __devinit pcibios_fixup_peer_bridges(void)
+static void pcibios_fixup_peer_bridges(void)
{
int n;
@@ -34,7 +34,7 @@ int __init pci_legacy_init(void)
return 0;
}
-void __devinit pcibios_scan_specific_bus(int busn)
+void pcibios_scan_specific_bus(int busn)
{
int devfn;
long node;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 704b9ec043d..082e8812971 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -49,7 +49,7 @@ static __init void free_all_mmcfg(void)
pci_mmconfig_remove(cfg);
}
-static __devinit void list_add_sorted(struct pci_mmcfg_region *new)
+static void list_add_sorted(struct pci_mmcfg_region *new)
{
struct pci_mmcfg_region *cfg;
@@ -65,9 +65,8 @@ static __devinit void list_add_sorted(struct pci_mmcfg_region *new)
list_add_tail_rcu(&new->list, &pci_mmcfg_list);
}
-static __devinit struct pci_mmcfg_region *pci_mmconfig_alloc(int segment,
- int start,
- int end, u64 addr)
+static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
+ int end, u64 addr)
{
struct pci_mmcfg_region *new;
struct resource *res;
@@ -371,8 +370,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
return !list_empty(&pci_mmcfg_list);
}
-static acpi_status __devinit check_mcfg_resource(struct acpi_resource *res,
- void *data)
+static acpi_status check_mcfg_resource(struct acpi_resource *res, void *data)
{
struct resource *mcfg_res = data;
struct acpi_resource_address64 address;
@@ -408,8 +406,8 @@ static acpi_status __devinit check_mcfg_resource(struct acpi_resource *res,
return AE_OK;
}
-static acpi_status __devinit find_mboard_resource(acpi_handle handle, u32 lvl,
- void *context, void **rv)
+static acpi_status find_mboard_resource(acpi_handle handle, u32 lvl,
+ void *context, void **rv)
{
struct resource *mcfg_res = context;
@@ -422,7 +420,7 @@ static acpi_status __devinit find_mboard_resource(acpi_handle handle, u32 lvl,
return AE_OK;
}
-static int __devinit is_acpi_reserved(u64 start, u64 end, unsigned not_used)
+static int is_acpi_reserved(u64 start, u64 end, unsigned not_used)
{
struct resource mcfg_res;
@@ -550,8 +548,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
if (cfg->address < 0xFFFFFFFF)
return 0;
- if (!strcmp(mcfg->header.oem_id, "SGI") ||
- !strcmp(mcfg->header.oem_id, "SGI2"))
+ if (!strncmp(mcfg->header.oem_id, "SGI", 3))
return 0;
if (mcfg->header.revision >= 1) {
@@ -693,9 +690,8 @@ static int __init pci_mmcfg_late_insert_resources(void)
late_initcall(pci_mmcfg_late_insert_resources);
/* Add MMCFG information for host bridges */
-int __devinit pci_mmconfig_insert(struct device *dev,
- u16 seg, u8 start, u8 end,
- phys_addr_t addr)
+int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
+ phys_addr_t addr)
{
int rc;
struct resource *tmp = NULL;
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index db63ac23e3d..5c90975cdf0 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -142,7 +142,7 @@ void __init pci_mmcfg_arch_free(void)
{
}
-int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
+int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
{
return 0;
}
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index d4ebd07c306..bea52496aea 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -95,7 +95,7 @@ const struct pci_raw_ops pci_mmcfg = {
.write = pci_mmcfg_write,
};
-static void __iomem * __devinit mcfg_ioremap(struct pci_mmcfg_region *cfg)
+static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg)
{
void __iomem *addr;
u64 start, size;
@@ -133,7 +133,7 @@ void __init pci_mmcfg_arch_free(void)
pci_mmcfg_arch_unmap(cfg);
}
-int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
+int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
{
cfg->virt = mcfg_ioremap(cfg);
if (!cfg->virt) {
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index e14a2ff708b..6eb18c42a28 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -247,7 +247,7 @@ int __init pci_mrst_init(void)
/* Langwell devices are not true pci devices, they are not subject to 10 ms
* d3 to d0 delay required by pci spec.
*/
-static void __devinit pci_d3delay_fixup(struct pci_dev *dev)
+static void pci_d3delay_fixup(struct pci_dev *dev)
{
/* PCI fixups are effectively decided compile time. If we have a dual
SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */
@@ -262,7 +262,7 @@ static void __devinit pci_d3delay_fixup(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
-static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev)
+static void mrst_power_off_unused_dev(struct pci_dev *dev)
{
pci_set_power_state(dev, PCI_D3hot);
}
@@ -275,7 +275,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
/*
* Langwell devices reside at fixed offsets, don't try to move them.
*/
-static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
+static void pci_fixed_bar_fixup(struct pci_dev *dev)
{
unsigned long offset;
u32 size;
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 83e125b95ca..b96b14c250b 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -116,7 +116,7 @@ static const struct pci_raw_ops pci_direct_conf1_mq = {
};
-static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+static void pci_fixup_i450nx(struct pci_dev *d)
{
/*
* i450NX -- Find and scan all secondary buses on all PXB's.
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index da8fe0535ff..c77b24a8b2d 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -124,7 +124,7 @@ static struct {
static int pci_bios_present;
-static int __devinit check_pcibios(void)
+static int check_pcibios(void)
{
u32 signature, eax, ebx, ecx;
u8 status, major_ver, minor_ver, hw_mech;
@@ -312,7 +312,7 @@ static const struct pci_raw_ops pci_bios_access = {
* Try to find PCI BIOS.
*/
-static const struct pci_raw_ops * __devinit pci_find_bios(void)
+static const struct pci_raw_ops *pci_find_bios(void)
{
union bios32 *check;
unsigned char sum;
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 8d874396cb2..01e0231a113 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -2,10 +2,12 @@
obj-y += ce4100/
obj-y += efi/
obj-y += geode/
+obj-y += goldfish/
obj-y += iris/
obj-y += mrst/
obj-y += olpc/
obj-y += scx200/
obj-y += sfi/
+obj-y += ts5500/
obj-y += visws/
obj-y += uv/
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d9c1b95af17..7145ec63c52 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -11,20 +11,21 @@
* published by the Free Software Foundation.
*/
#include <linux/kernel.h>
+#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/efi.h>
#include <linux/efi-bgrt.h>
struct acpi_table_bgrt *bgrt_tab;
-void *bgrt_image;
-size_t bgrt_image_size;
+void *__initdata bgrt_image;
+size_t __initdata bgrt_image_size;
struct bmp_header {
u16 id;
u32 size;
} __packed;
-void efi_bgrt_init(void)
+void __init efi_bgrt_init(void)
{
acpi_status status;
void __iomem *image;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad4439145f8..70b2a3a305d 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -51,9 +51,6 @@
#define EFI_DEBUG 1
-int efi_enabled;
-EXPORT_SYMBOL(efi_enabled);
-
struct efi __read_mostly efi = {
.mps = EFI_INVALID_TABLE_ADDR,
.acpi = EFI_INVALID_TABLE_ADDR,
@@ -69,19 +66,28 @@ EXPORT_SYMBOL(efi);
struct efi_memory_map memmap;
-bool efi_64bit;
-
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
static inline bool efi_is_native(void)
{
- return IS_ENABLED(CONFIG_X86_64) == efi_64bit;
+ return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+}
+
+unsigned long x86_efi_facility;
+
+/*
+ * Returns 1 if 'facility' is enabled, 0 otherwise.
+ */
+int efi_enabled(int facility)
+{
+ return test_bit(facility, &x86_efi_facility) != 0;
}
+EXPORT_SYMBOL(efi_enabled);
static int __init setup_noefi(char *arg)
{
- efi_enabled = 0;
+ clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
return 0;
}
early_param("noefi", setup_noefi);
@@ -410,8 +416,8 @@ void __init efi_reserve_boot_services(void)
* - Not within any part of the kernel
* - Not the bios reserved area
*/
- if ((start+size >= virt_to_phys(_text)
- && start <= virt_to_phys(_end)) ||
+ if ((start+size >= __pa_symbol(_text)
+ && start <= __pa_symbol(_end)) ||
!e820_all_mapped(start, start+size, E820_RAM) ||
memblock_is_region_reserved(start, size)) {
/* Could not reserve, skip it */
@@ -426,6 +432,7 @@ void __init efi_reserve_boot_services(void)
void __init efi_unmap_memmap(void)
{
+ clear_bit(EFI_MEMMAP, &x86_efi_facility);
if (memmap.map) {
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
memmap.map = NULL;
@@ -460,7 +467,7 @@ void __init efi_free_boot_services(void)
static int __init efi_systab_init(void *phys)
{
- if (efi_64bit) {
+ if (efi_enabled(EFI_64BIT)) {
efi_system_table_64_t *systab64;
u64 tmp = 0;
@@ -552,7 +559,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
void *config_tables, *tablep;
int i, sz;
- if (efi_64bit)
+ if (efi_enabled(EFI_64BIT))
sz = sizeof(efi_config_table_64_t);
else
sz = sizeof(efi_config_table_32_t);
@@ -572,7 +579,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
efi_guid_t guid;
unsigned long table;
- if (efi_64bit) {
+ if (efi_enabled(EFI_64BIT)) {
u64 table64;
guid = ((efi_config_table_64_t *)tablep)->guid;
table64 = ((efi_config_table_64_t *)tablep)->table;
@@ -684,7 +691,6 @@ void __init efi_init(void)
if (boot_params.efi_info.efi_systab_hi ||
boot_params.efi_info.efi_memmap_hi) {
pr_info("Table located above 4GB, disabling EFI.\n");
- efi_enabled = 0;
return;
}
efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
@@ -694,10 +700,10 @@ void __init efi_init(void)
((__u64)boot_params.efi_info.efi_systab_hi<<32));
#endif
- if (efi_systab_init(efi_phys.systab)) {
- efi_enabled = 0;
+ if (efi_systab_init(efi_phys.systab))
return;
- }
+
+ set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
/*
* Show what we know for posterity
@@ -715,10 +721,10 @@ void __init efi_init(void)
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff, vendor);
- if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) {
- efi_enabled = 0;
+ if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
return;
- }
+
+ set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
/*
* Note: We currently don't support runtime services on an EFI
@@ -727,15 +733,17 @@ void __init efi_init(void)
if (!efi_is_native())
pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
- else if (efi_runtime_init()) {
- efi_enabled = 0;
- return;
+ else {
+ if (efi_runtime_init())
+ return;
+ set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
}
- if (efi_memmap_init()) {
- efi_enabled = 0;
+ if (efi_memmap_init())
return;
- }
+
+ set_bit(EFI_MEMMAP, &x86_efi_facility);
+
#ifdef CONFIG_X86_32
if (efi_is_native()) {
x86_platform.get_wallclock = efi_get_time;
@@ -835,7 +843,7 @@ void __init efi_enter_virtual_mode(void)
efi_memory_desc_t *md, *prev_md = NULL;
efi_status_t status;
unsigned long size;
- u64 end, systab, end_pfn;
+ u64 end, systab, start_pfn, end_pfn;
void *p, *va, *new_memmap = NULL;
int count = 0;
@@ -888,10 +896,9 @@ void __init efi_enter_virtual_mode(void)
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
+ start_pfn = PFN_DOWN(md->phys_addr);
end_pfn = PFN_UP(end);
- if (end_pfn <= max_low_pfn_mapped
- || (end_pfn > (1UL << (32 - PAGE_SHIFT))
- && end_pfn <= max_pfn_mapped)) {
+ if (pfn_range_is_mapped(start_pfn, end_pfn)) {
va = __va(md->phys_addr);
if (!(md->attribute & EFI_MEMORY_WB))
@@ -941,7 +948,7 @@ void __init efi_enter_virtual_mode(void)
*
* Call EFI services through wrapper functions.
*/
- efi.runtime_version = efi_systab.fw_revision;
+ efi.runtime_version = efi_systab.hdr.revision;
efi.get_time = virt_efi_get_time;
efi.set_time = virt_efi_set_time;
efi.get_wakeup_time = virt_efi_get_wakeup_time;
@@ -969,6 +976,9 @@ u32 efi_mem_type(unsigned long phys_addr)
efi_memory_desc_t *md;
void *p;
+ if (!efi_enabled(EFI_MEMMAP))
+ return 0;
+
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
if ((md->phys_addr <= phys_addr) &&
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 95fd505dfeb..2b200386061 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -38,7 +38,7 @@
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
-static pgd_t save_pgd __initdata;
+static pgd_t *save_pgd __initdata;
static unsigned long efi_flags __initdata;
static void __init early_code_mapping_set_exec(int executable)
@@ -61,12 +61,20 @@ static void __init early_code_mapping_set_exec(int executable)
void __init efi_call_phys_prelog(void)
{
unsigned long vaddress;
+ int pgd;
+ int n_pgds;
early_code_mapping_set_exec(1);
local_irq_save(efi_flags);
- vaddress = (unsigned long)__va(0x0UL);
- save_pgd = *pgd_offset_k(0x0UL);
- set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
+
+ n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
+ save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
+
+ for (pgd = 0; pgd < n_pgds; pgd++) {
+ save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
+ vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
+ set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
+ }
__flush_tlb_all();
}
@@ -75,7 +83,11 @@ void __init efi_call_phys_epilog(void)
/*
* After the lock is released, the original page table is restored.
*/
- set_pgd(pgd_offset_k(0x0UL), save_pgd);
+ int pgd;
+ int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+ for (pgd = 0; pgd < n_pgds; pgd++)
+ set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
+ kfree(save_pgd);
__flush_tlb_all();
local_irq_restore(efi_flags);
early_code_mapping_set_exec(0);
diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile
new file mode 100644
index 00000000000..f030b532fdf
--- /dev/null
+++ b/arch/x86/platform/goldfish/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_GOLDFISH) += goldfish.o
diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c
new file mode 100644
index 00000000000..1693107a518
--- /dev/null
+++ b/arch/x86/platform/goldfish/goldfish.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2007 Google, Inc.
+ * Copyright (C) 2011 Intel, Inc.
+ * Copyright (C) 2013 Intel, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+
+/*
+ * Where in virtual device memory the IO devices (timers, system controllers
+ * and so on)
+ */
+
+#define GOLDFISH_PDEV_BUS_BASE (0xff001000)
+#define GOLDFISH_PDEV_BUS_END (0xff7fffff)
+#define GOLDFISH_PDEV_BUS_IRQ (4)
+
+#define GOLDFISH_TTY_BASE (0x2000)
+
+static struct resource goldfish_pdev_bus_resources[] = {
+ {
+ .start = GOLDFISH_PDEV_BUS_BASE,
+ .end = GOLDFISH_PDEV_BUS_END,
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .start = GOLDFISH_PDEV_BUS_IRQ,
+ .end = GOLDFISH_PDEV_BUS_IRQ,
+ .flags = IORESOURCE_IRQ,
+ }
+};
+
+static int __init goldfish_init(void)
+{
+ platform_device_register_simple("goldfish_pdev_bus", -1,
+ goldfish_pdev_bus_resources, 2);
+ return 0;
+}
+device_initcall(goldfish_init);
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index fd41a9262d6..e31bcd8f2ee 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
EXPORT_SYMBOL_GPL(intel_scu_notifier);
/* Called by IPC driver */
-void __devinit intel_scu_devices_create(void)
+void intel_scu_devices_create(void)
{
int i;
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index d75582d1aa5..ff0174dda81 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -121,7 +121,7 @@ static const struct platform_suspend_ops xo1_suspend_ops = {
.enter = xo1_power_state_enter,
};
-static int __devinit xo1_pm_probe(struct platform_device *pdev)
+static int xo1_pm_probe(struct platform_device *pdev)
{
struct resource *res;
int err;
@@ -154,7 +154,7 @@ static int __devinit xo1_pm_probe(struct platform_device *pdev)
return 0;
}
-static int __devexit xo1_pm_remove(struct platform_device *pdev)
+static int xo1_pm_remove(struct platform_device *pdev)
{
mfd_cell_disable(pdev);
@@ -173,7 +173,7 @@ static struct platform_driver cs5535_pms_driver = {
.owner = THIS_MODULE,
},
.probe = xo1_pm_probe,
- .remove = __devexit_p(xo1_pm_remove),
+ .remove = xo1_pm_remove,
};
static struct platform_driver cs5535_acpi_driver = {
@@ -182,7 +182,7 @@ static struct platform_driver cs5535_acpi_driver = {
.owner = THIS_MODULE,
},
.probe = xo1_pm_probe,
- .remove = __devexit_p(xo1_pm_remove),
+ .remove = xo1_pm_remove,
};
static int __init xo1_pm_init(void)
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 63d4aa40956..74704be7b1f 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -309,7 +309,7 @@ static int xo1_sci_resume(struct platform_device *pdev)
return 0;
}
-static int __devinit setup_sci_interrupt(struct platform_device *pdev)
+static int setup_sci_interrupt(struct platform_device *pdev)
{
u32 lo, hi;
u32 sts;
@@ -351,7 +351,7 @@ static int __devinit setup_sci_interrupt(struct platform_device *pdev)
return r;
}
-static int __devinit setup_ec_sci(void)
+static int setup_ec_sci(void)
{
int r;
@@ -395,7 +395,7 @@ static void free_ec_sci(void)
gpio_free(OLPC_GPIO_ECSCI);
}
-static int __devinit setup_lid_events(void)
+static int setup_lid_events(void)
{
int r;
@@ -432,7 +432,7 @@ static void free_lid_events(void)
gpio_free(OLPC_GPIO_LID);
}
-static int __devinit setup_power_button(struct platform_device *pdev)
+static int setup_power_button(struct platform_device *pdev)
{
int r;
@@ -463,7 +463,7 @@ static void free_power_button(void)
input_free_device(power_button_idev);
}
-static int __devinit setup_ebook_switch(struct platform_device *pdev)
+static int setup_ebook_switch(struct platform_device *pdev)
{
int r;
@@ -494,7 +494,7 @@ static void free_ebook_switch(void)
input_free_device(ebook_switch_idev);
}
-static int __devinit setup_lid_switch(struct platform_device *pdev)
+static int setup_lid_switch(struct platform_device *pdev)
{
int r;
@@ -538,7 +538,7 @@ static void free_lid_switch(void)
input_free_device(lid_switch_idev);
}
-static int __devinit xo1_sci_probe(struct platform_device *pdev)
+static int xo1_sci_probe(struct platform_device *pdev)
{
struct resource *res;
int r;
@@ -613,7 +613,7 @@ err_ebook:
return r;
}
-static int __devexit xo1_sci_remove(struct platform_device *pdev)
+static int xo1_sci_remove(struct platform_device *pdev)
{
mfd_cell_disable(pdev);
free_irq(sci_irq, pdev);
@@ -632,7 +632,7 @@ static struct platform_driver xo1_sci_driver = {
.name = "olpc-xo1-sci-acpi",
},
.probe = xo1_sci_probe,
- .remove = __devexit_p(xo1_sci_remove),
+ .remove = xo1_sci_remove,
.suspend = xo1_sci_suspend,
.resume = xo1_sci_resume,
};
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 2fdca25905a..fef7d0ba7e3 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -195,7 +195,7 @@ err_sysfs:
return r;
}
-static int xo15_sci_remove(struct acpi_device *device, int type)
+static int xo15_sci_remove(struct acpi_device *device)
{
acpi_disable_gpe(NULL, xo15_sci_gpe);
acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 7a9ad30d6c9..3dc9aee41d9 100644
--- a/arch/x86/platform/scx200/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -35,7 +35,7 @@ static struct pci_device_id scx200_tbl[] = {
};
MODULE_DEVICE_TABLE(pci,scx200_tbl);
-static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *);
+static int scx200_probe(struct pci_dev *, const struct pci_device_id *);
static struct pci_driver scx200_pci_driver = {
.name = "scx200",
@@ -45,7 +45,7 @@ static struct pci_driver scx200_pci_driver = {
static DEFINE_MUTEX(scx200_gpio_config_lock);
-static void __devinit scx200_init_shadow(void)
+static void scx200_init_shadow(void)
{
int bank;
@@ -54,7 +54,7 @@ static void __devinit scx200_init_shadow(void)
scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
}
-static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
unsigned base;
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index 7785b72ecc3..bcd1a703e3e 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -35,7 +35,7 @@
static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
/* All CPUs enumerated by SFI must be present and enabled */
-static void __cpuinit mp_sfi_register_lapic(u8 id)
+static void __init mp_sfi_register_lapic(u8 id)
{
if (MAX_LOCAL_APIC - id <= 0) {
pr_warning("Processor #%d invalid (max %d)\n",
diff --git a/arch/x86/platform/ts5500/Makefile b/arch/x86/platform/ts5500/Makefile
new file mode 100644
index 00000000000..c54e348c96a
--- /dev/null
+++ b/arch/x86/platform/ts5500/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_TS5500) += ts5500.o
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
new file mode 100644
index 00000000000..39febb214e8
--- /dev/null
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -0,0 +1,339 @@
+/*
+ * Technologic Systems TS-5500 Single Board Computer support
+ *
+ * Copyright (C) 2013 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ *
+ * This driver registers the Technologic Systems TS-5500 Single Board Computer
+ * (SBC) and its devices, and exposes information to userspace such as jumpers'
+ * state or available options. For further information about sysfs entries, see
+ * Documentation/ABI/testing/sysfs-platform-ts5500.
+ *
+ * This code actually supports the TS-5500 platform, but it may be extended to
+ * support similar Technologic Systems x86-based platforms, such as the TS-5600.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/platform_data/gpio-ts5500.h>
+#include <linux/platform_data/max197.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* Product code register */
+#define TS5500_PRODUCT_CODE_ADDR 0x74
+#define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */
+
+/* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */
+#define TS5500_SRAM_RS485_ADC_ADDR 0x75
+#define TS5500_SRAM BIT(0) /* SRAM option */
+#define TS5500_RS485 BIT(1) /* RS-485 option */
+#define TS5500_ADC BIT(2) /* A/D converter option */
+#define TS5500_RS485_RTS BIT(6) /* RTS for RS-485 */
+#define TS5500_RS485_AUTO BIT(7) /* Automatic RS-485 */
+
+/* External Reset/Industrial Temperature Range options register */
+#define TS5500_ERESET_ITR_ADDR 0x76
+#define TS5500_ERESET BIT(0) /* External Reset option */
+#define TS5500_ITR BIT(1) /* Indust. Temp. Range option */
+
+/* LED/Jumpers register */
+#define TS5500_LED_JP_ADDR 0x77
+#define TS5500_LED BIT(0) /* LED flag */
+#define TS5500_JP1 BIT(1) /* Automatic CMOS */
+#define TS5500_JP2 BIT(2) /* Enable Serial Console */
+#define TS5500_JP3 BIT(3) /* Write Enable Drive A */
+#define TS5500_JP4 BIT(4) /* Fast Console (115K baud) */
+#define TS5500_JP5 BIT(5) /* User Jumper */
+#define TS5500_JP6 BIT(6) /* Console on COM1 (req. JP2) */
+#define TS5500_JP7 BIT(7) /* Undocumented (Unused) */
+
+/* A/D Converter registers */
+#define TS5500_ADC_CONV_BUSY_ADDR 0x195 /* Conversion state register */
+#define TS5500_ADC_CONV_BUSY BIT(0)
+#define TS5500_ADC_CONV_INIT_LSB_ADDR 0x196 /* Start conv. / LSB register */
+#define TS5500_ADC_CONV_MSB_ADDR 0x197 /* MSB register */
+#define TS5500_ADC_CONV_DELAY 12 /* usec */
+
+/**
+ * struct ts5500_sbc - TS-5500 board description
+ * @id: Board product ID.
+ * @sram: Flag for SRAM option.
+ * @rs485: Flag for RS-485 option.
+ * @adc: Flag for Analog/Digital converter option.
+ * @ereset: Flag for External Reset option.
+ * @itr: Flag for Industrial Temperature Range option.
+ * @jumpers: Bitfield for jumpers' state.
+ */
+struct ts5500_sbc {
+ int id;
+ bool sram;
+ bool rs485;
+ bool adc;
+ bool ereset;
+ bool itr;
+ u8 jumpers;
+};
+
+/* Board signatures in BIOS shadow RAM */
+static const struct {
+ const char * const string;
+ const ssize_t offset;
+} ts5500_signatures[] __initdata = {
+ { "TS-5x00 AMD Elan", 0xb14 },
+};
+
+static int __init ts5500_check_signature(void)
+{
+ void __iomem *bios;
+ int i, ret = -ENODEV;
+
+ bios = ioremap(0xf0000, 0x10000);
+ if (!bios)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(ts5500_signatures); i++) {
+ if (check_signature(bios + ts5500_signatures[i].offset,
+ ts5500_signatures[i].string,
+ strlen(ts5500_signatures[i].string))) {
+ ret = 0;
+ break;
+ }
+ }
+
+ iounmap(bios);
+ return ret;
+}
+
+static int __init ts5500_detect_config(struct ts5500_sbc *sbc)
+{
+ u8 tmp;
+ int ret = 0;
+
+ if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500"))
+ return -EBUSY;
+
+ tmp = inb(TS5500_PRODUCT_CODE_ADDR);
+ if (tmp != TS5500_PRODUCT_CODE) {
+ pr_err("This platform is not a TS-5500 (found ID 0x%x)\n", tmp);
+ ret = -ENODEV;
+ goto cleanup;
+ }
+ sbc->id = tmp;
+
+ tmp = inb(TS5500_SRAM_RS485_ADC_ADDR);
+ sbc->sram = tmp & TS5500_SRAM;
+ sbc->rs485 = tmp & TS5500_RS485;
+ sbc->adc = tmp & TS5500_ADC;
+
+ tmp = inb(TS5500_ERESET_ITR_ADDR);
+ sbc->ereset = tmp & TS5500_ERESET;
+ sbc->itr = tmp & TS5500_ITR;
+
+ tmp = inb(TS5500_LED_JP_ADDR);
+ sbc->jumpers = tmp & ~TS5500_LED;
+
+cleanup:
+ release_region(TS5500_PRODUCT_CODE_ADDR, 4);
+ return ret;
+}
+
+static ssize_t ts5500_show_id(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ts5500_sbc *sbc = dev_get_drvdata(dev);
+
+ return sprintf(buf, "0x%.2x\n", sbc->id);
+}
+
+static ssize_t ts5500_show_jumpers(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ts5500_sbc *sbc = dev_get_drvdata(dev);
+
+ return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1);
+}
+
+#define TS5500_SHOW(field) \
+ static ssize_t ts5500_show_##field(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+ { \
+ struct ts5500_sbc *sbc = dev_get_drvdata(dev); \
+ return sprintf(buf, "%d\n", sbc->field); \
+ }
+
+TS5500_SHOW(sram)
+TS5500_SHOW(rs485)
+TS5500_SHOW(adc)
+TS5500_SHOW(ereset)
+TS5500_SHOW(itr)
+
+static DEVICE_ATTR(id, S_IRUGO, ts5500_show_id, NULL);
+static DEVICE_ATTR(jumpers, S_IRUGO, ts5500_show_jumpers, NULL);
+static DEVICE_ATTR(sram, S_IRUGO, ts5500_show_sram, NULL);
+static DEVICE_ATTR(rs485, S_IRUGO, ts5500_show_rs485, NULL);
+static DEVICE_ATTR(adc, S_IRUGO, ts5500_show_adc, NULL);
+static DEVICE_ATTR(ereset, S_IRUGO, ts5500_show_ereset, NULL);
+static DEVICE_ATTR(itr, S_IRUGO, ts5500_show_itr, NULL);
+
+static struct attribute *ts5500_attributes[] = {
+ &dev_attr_id.attr,
+ &dev_attr_jumpers.attr,
+ &dev_attr_sram.attr,
+ &dev_attr_rs485.attr,
+ &dev_attr_adc.attr,
+ &dev_attr_ereset.attr,
+ &dev_attr_itr.attr,
+ NULL
+};
+
+static const struct attribute_group ts5500_attr_group = {
+ .attrs = ts5500_attributes,
+};
+
+static struct resource ts5500_dio1_resource[] = {
+ DEFINE_RES_IRQ_NAMED(7, "DIO1 interrupt"),
+};
+
+static struct platform_device ts5500_dio1_pdev = {
+ .name = "ts5500-dio1",
+ .id = -1,
+ .resource = ts5500_dio1_resource,
+ .num_resources = 1,
+};
+
+static struct resource ts5500_dio2_resource[] = {
+ DEFINE_RES_IRQ_NAMED(6, "DIO2 interrupt"),
+};
+
+static struct platform_device ts5500_dio2_pdev = {
+ .name = "ts5500-dio2",
+ .id = -1,
+ .resource = ts5500_dio2_resource,
+ .num_resources = 1,
+};
+
+static void ts5500_led_set(struct led_classdev *led_cdev,
+ enum led_brightness brightness)
+{
+ outb(!!brightness, TS5500_LED_JP_ADDR);
+}
+
+static enum led_brightness ts5500_led_get(struct led_classdev *led_cdev)
+{
+ return (inb(TS5500_LED_JP_ADDR) & TS5500_LED) ? LED_FULL : LED_OFF;
+}
+
+static struct led_classdev ts5500_led_cdev = {
+ .name = "ts5500:green:",
+ .brightness_set = ts5500_led_set,
+ .brightness_get = ts5500_led_get,
+};
+
+static int ts5500_adc_convert(u8 ctrl)
+{
+ u8 lsb, msb;
+
+ /* Start conversion (ensure the 3 MSB are set to 0) */
+ outb(ctrl & 0x1f, TS5500_ADC_CONV_INIT_LSB_ADDR);
+
+ /*
+ * The platform has CPLD logic driving the A/D converter.
+ * The conversion must complete within 11 microseconds,
+ * otherwise we have to re-initiate a conversion.
+ */
+ udelay(TS5500_ADC_CONV_DELAY);
+ if (inb(TS5500_ADC_CONV_BUSY_ADDR) & TS5500_ADC_CONV_BUSY)
+ return -EBUSY;
+
+ /* Read the raw data */
+ lsb = inb(TS5500_ADC_CONV_INIT_LSB_ADDR);
+ msb = inb(TS5500_ADC_CONV_MSB_ADDR);
+
+ return (msb << 8) | lsb;
+}
+
+static struct max197_platform_data ts5500_adc_pdata = {
+ .convert = ts5500_adc_convert,
+};
+
+static struct platform_device ts5500_adc_pdev = {
+ .name = "max197",
+ .id = -1,
+ .dev = {
+ .platform_data = &ts5500_adc_pdata,
+ },
+};
+
+static int __init ts5500_init(void)
+{
+ struct platform_device *pdev;
+ struct ts5500_sbc *sbc;
+ int err;
+
+ /*
+ * There is no DMI available or PCI bridge subvendor info,
+ * only the BIOS provides a 16-bit identification call.
+ * It is safer to find a signature in the BIOS shadow RAM.
+ */
+ err = ts5500_check_signature();
+ if (err)
+ return err;
+
+ pdev = platform_device_register_simple("ts5500", -1, NULL, 0);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ sbc = devm_kzalloc(&pdev->dev, sizeof(struct ts5500_sbc), GFP_KERNEL);
+ if (!sbc) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ err = ts5500_detect_config(sbc);
+ if (err)
+ goto error;
+
+ platform_set_drvdata(pdev, sbc);
+
+ err = sysfs_create_group(&pdev->dev.kobj, &ts5500_attr_group);
+ if (err)
+ goto error;
+
+ ts5500_dio1_pdev.dev.parent = &pdev->dev;
+ if (platform_device_register(&ts5500_dio1_pdev))
+ dev_warn(&pdev->dev, "DIO1 block registration failed\n");
+ ts5500_dio2_pdev.dev.parent = &pdev->dev;
+ if (platform_device_register(&ts5500_dio2_pdev))
+ dev_warn(&pdev->dev, "DIO2 block registration failed\n");
+
+ if (led_classdev_register(&pdev->dev, &ts5500_led_cdev))
+ dev_warn(&pdev->dev, "LED registration failed\n");
+
+ if (sbc->adc) {
+ ts5500_adc_pdev.dev.parent = &pdev->dev;
+ if (platform_device_register(&ts5500_adc_pdev))
+ dev_warn(&pdev->dev, "ADC registration failed\n");
+ }
+
+ return 0;
+error:
+ platform_device_unregister(pdev);
+ return err;
+}
+device_initcall(ts5500_init);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>");
+MODULE_DESCRIPTION("Technologic Systems TS-5500 platform driver");
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index b8b3a37c80c..0f92173a12b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1034,7 +1034,8 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
* globally purge translation cache of a virtual address or all TLB's
* @cpumask: mask of all cpu's in which the address is to be removed
* @mm: mm_struct containing virtual address range
- * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
+ * @start: start virtual address to be removed from TLB
+ * @end: end virtual address to be remove from TLB
* @cpu: the current cpu
*
* This is the entry point for initiating any UV global TLB shootdown.
@@ -1056,7 +1057,7 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
*/
const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long start,
- unsigned end, unsigned int cpu)
+ unsigned long end, unsigned int cpu)
{
int locals = 0;
int remotes = 0;
@@ -1113,7 +1114,10 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
record_send_statistics(stat, locals, hubs, remotes, bau_desc);
- bau_desc->payload.address = start;
+ if (!end || (end - start) <= PAGE_SIZE)
+ bau_desc->payload.address = start;
+ else
+ bau_desc->payload.address = TLB_FLUSH_ALL;
bau_desc->payload.sending_cpu = cpu;
/*
* uv_flush_send_and_wait returns 0 if all cpu's were messaged,
@@ -1463,7 +1467,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
}
if (input_arg == 0) {
- elements = sizeof(stat_description)/sizeof(*stat_description);
+ elements = ARRAY_SIZE(stat_description);
printk(KERN_DEBUG "# cpu: cpu number\n");
printk(KERN_DEBUG "Sender statistics:\n");
for (i = 0; i < elements; i++)
@@ -1504,7 +1508,7 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr,
char *q;
int cnt = 0;
int val;
- int e = sizeof(tunables) / sizeof(*tunables);
+ int e = ARRAY_SIZE(tunables);
p = instr + strspn(instr, WHITESPACE);
q = p;
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 5032e0d19b8..98718f604eb 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -15,7 +15,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved.
* Copyright (c) Dimitri Sivanich
*/
#include <linux/clockchips.h>
@@ -102,9 +102,10 @@ static int uv_intr_pending(int pnode)
if (is_uv1_hub())
return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
UV1H_EVENT_OCCURRED0_RTC1_MASK;
- else
- return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) &
- UV2H_EVENT_OCCURRED2_RTC_1_MASK;
+ else if (is_uvx_hub())
+ return uv_read_global_mmr64(pnode, UVXH_EVENT_OCCURRED2) &
+ UVXH_EVENT_OCCURRED2_RTC_1_MASK;
+ return 0;
}
/* Setup interrupt and return non-zero if early expiration occurred. */
@@ -122,8 +123,8 @@ static int uv_setup_intr(int cpu, u64 expires)
uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
UV1H_EVENT_OCCURRED0_RTC1_MASK);
else
- uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS,
- UV2H_EVENT_OCCURRED2_RTC_1_MASK);
+ uv_write_global_mmr64(pnode, UVXH_EVENT_OCCURRED2_ALIAS,
+ UVXH_EVENT_OCCURRED2_RTC_1_MASK);
val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 74202c1910c..7d28c885d23 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -129,8 +129,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
}
}
- resume_map_numa_kva(pgd_base);
-
return 0;
}
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 460f314d13e..a0fde91c16c 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -11,6 +11,8 @@
#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/suspend.h>
+
+#include <asm/init.h>
#include <asm/proto.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -39,41 +41,21 @@ pgd_t *temp_level4_pgt;
void *relocated_restore_code;
-static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+static void *alloc_pgt_page(void *context)
{
- long i, j;
-
- i = pud_index(address);
- pud = pud + i;
- for (; i < PTRS_PER_PUD; pud++, i++) {
- unsigned long paddr;
- pmd_t *pmd;
-
- paddr = address + i*PUD_SIZE;
- if (paddr >= end)
- break;
-
- pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!pmd)
- return -ENOMEM;
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
- unsigned long pe;
-
- if (paddr >= end)
- break;
- pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
- pe &= __supported_pte_mask;
- set_pmd(pmd, __pmd(pe));
- }
- }
- return 0;
+ return (void *)get_safe_page(GFP_ATOMIC);
}
static int set_up_temporary_mappings(void)
{
- unsigned long start, end, next;
- int error;
+ struct x86_mapping_info info = {
+ .alloc_pgt_page = alloc_pgt_page,
+ .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+ .kernel_mapping = true,
+ };
+ unsigned long mstart, mend;
+ int result;
+ int i;
temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!temp_level4_pgt)
@@ -84,21 +66,17 @@ static int set_up_temporary_mappings(void)
init_level4_pgt[pgd_index(__START_KERNEL_map)]);
/* Set up the direct mapping from scratch */
- start = (unsigned long)pfn_to_kaddr(0);
- end = (unsigned long)pfn_to_kaddr(max_pfn);
-
- for (; start < end; start = next) {
- pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!pud)
- return -ENOMEM;
- next = start + PGDIR_SIZE;
- if (next > end)
- next = end;
- if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
- return error;
- set_pgd(temp_level4_pgt + pgd_index(start),
- mk_kernel_pgd(__pa(pud)));
+ for (i = 0; i < nr_pfn_mapped; i++) {
+ mstart = pfn_mapped[i].start << PAGE_SHIFT;
+ mend = pfn_mapped[i].end << PAGE_SHIFT;
+
+ result = kernel_ident_mapping_init(&info, temp_level4_pgt,
+ mstart, mend);
+
+ if (result)
+ return result;
}
+
return 0;
}
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index cbca565af5b..a44f457e70a 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -8,9 +8,26 @@
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
-void __init setup_real_mode(void)
+void __init reserve_real_mode(void)
{
phys_addr_t mem;
+ unsigned char *base;
+ size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
+
+ /* Has to be under 1M so we can execute real-mode AP code. */
+ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+ if (!mem)
+ panic("Cannot allocate trampoline\n");
+
+ base = __va(mem);
+ memblock_reserve(mem, size);
+ real_mode_header = (struct real_mode_header *) base;
+ printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
+ base, (unsigned long long)mem, size);
+}
+
+void __init setup_real_mode(void)
+{
u16 real_mode_seg;
u32 *rel;
u32 count;
@@ -25,16 +42,7 @@ void __init setup_real_mode(void)
u64 efer;
#endif
- /* Has to be in very low memory so we can execute real-mode AP code. */
- mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
- if (!mem)
- panic("Cannot allocate trampoline\n");
-
- base = __va(mem);
- memblock_reserve(mem, size);
- real_mode_header = (struct real_mode_header *) base;
- printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
- base, (unsigned long long)mem, size);
+ base = (unsigned char *)real_mode_header;
memcpy(base, real_mode_blob, size);
@@ -62,9 +70,9 @@ void __init setup_real_mode(void)
__va(real_mode_header->trampoline_header);
#ifdef CONFIG_X86_32
- trampoline_header->start = __pa(startup_32_smp);
+ trampoline_header->start = __pa_symbol(startup_32_smp);
trampoline_header->gdt_limit = __BOOT_DS + 7;
- trampoline_header->gdt_base = __pa(boot_gdt);
+ trampoline_header->gdt_base = __pa_symbol(boot_gdt);
#else
/*
* Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR
@@ -78,16 +86,18 @@ void __init setup_real_mode(void)
*trampoline_cr4_features = read_cr4();
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
- trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE;
- trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE;
+ trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+ trampoline_pgd[511] = init_level4_pgt[511].pgd;
#endif
}
/*
- * set_real_mode_permissions() gets called very early, to guarantee the
- * availability of low memory. This is before the proper kernel page
+ * reserve_real_mode() gets called very early, to guarantee the
+ * availability of low memory. This is before the proper kernel page
* tables are set up, so we cannot set page permissions in that
- * function. Thus, we use an arch_initcall instead.
+ * function. Also trampoline code will be executed by APs so we
+ * need to mark it executable at do_pre_smp_initcalls() at least,
+ * thus run it as a early_initcall().
*/
static int __init set_real_mode_permissions(void)
{
@@ -111,5 +121,4 @@ static int __init set_real_mode_permissions(void)
return 0;
}
-
-arch_initcall(set_real_mode_permissions);
+early_initcall(set_real_mode_permissions);
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 28e3fa9056e..f2fe78ff22c 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -73,12 +73,12 @@
64 i386 getppid sys_getppid
65 i386 getpgrp sys_getpgrp
66 i386 setsid sys_setsid
-67 i386 sigaction sys_sigaction sys32_sigaction
+67 i386 sigaction sys_sigaction compat_sys_sigaction
68 i386 sgetmask sys_sgetmask
69 i386 ssetmask sys_ssetmask
70 i386 setreuid sys_setreuid16
71 i386 setregid sys_setregid16
-72 i386 sigsuspend sys_sigsuspend sys32_sigsuspend
+72 i386 sigsuspend sys_sigsuspend sys_sigsuspend
73 i386 sigpending sys_sigpending compat_sys_sigpending
74 i386 sethostname sys_sethostname
75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
@@ -116,16 +116,16 @@
107 i386 lstat sys_newlstat compat_sys_newlstat
108 i386 fstat sys_newfstat compat_sys_newfstat
109 i386 olduname sys_uname
-110 i386 iopl ptregs_iopl stub32_iopl
+110 i386 iopl sys_iopl
111 i386 vhangup sys_vhangup
112 i386 idle
-113 i386 vm86old ptregs_vm86old sys32_vm86_warning
+113 i386 vm86old sys_vm86old sys32_vm86_warning
114 i386 wait4 sys_wait4 compat_sys_wait4
115 i386 swapoff sys_swapoff
116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
117 i386 ipc sys_ipc sys32_ipc
118 i386 fsync sys_fsync
-119 i386 sigreturn ptregs_sigreturn stub32_sigreturn
+119 i386 sigreturn sys_sigreturn stub32_sigreturn
120 i386 clone sys_clone stub32_clone
121 i386 setdomainname sys_setdomainname
122 i386 uname sys_newuname
@@ -167,24 +167,24 @@
158 i386 sched_yield sys_sched_yield
159 i386 sched_get_priority_max sys_sched_get_priority_max
160 i386 sched_get_priority_min sys_sched_get_priority_min
-161 i386 sched_rr_get_interval sys_sched_rr_get_interval sys32_sched_rr_get_interval
+161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
162 i386 nanosleep sys_nanosleep compat_sys_nanosleep
163 i386 mremap sys_mremap
164 i386 setresuid sys_setresuid16
165 i386 getresuid sys_getresuid16
-166 i386 vm86 ptregs_vm86 sys32_vm86_warning
+166 i386 vm86 sys_vm86 sys32_vm86_warning
167 i386 query_module
168 i386 poll sys_poll
169 i386 nfsservctl
170 i386 setresgid sys_setresgid16
171 i386 getresgid sys_getresgid16
172 i386 prctl sys_prctl
-173 i386 rt_sigreturn ptregs_rt_sigreturn stub32_rt_sigreturn
-174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction
+173 i386 rt_sigreturn sys_rt_sigreturn stub32_rt_sigreturn
+174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
175 i386 rt_sigprocmask sys_rt_sigprocmask
-176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending
+176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
-178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo
+178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
179 i386 rt_sigsuspend sys_rt_sigsuspend
180 i386 pread64 sys_pread64 sys32_pread
181 i386 pwrite64 sys_pwrite64 sys32_pwrite
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index dc97328bd90..38ae65dfd14 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,7 +325,7 @@
# x32-specific system call numbers start at 512 to avoid cache impact
# for native 64-bit operation.
#
-512 x32 rt_sigaction sys32_rt_sigaction
+512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn stub_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
515 x32 readv compat_sys_readv
@@ -335,9 +335,9 @@
519 x32 recvmsg compat_sys_recvmsg
520 x32 execve stub_x32_execve
521 x32 ptrace compat_sys_ptrace
-522 x32 rt_sigpending sys32_rt_sigpending
+522 x32 rt_sigpending compat_sys_rt_sigpending
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
-524 x32 rt_sigqueueinfo sys32_rt_sigqueueinfo
+524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
525 x32 sigaltstack compat_sys_sigaltstack
526 x32 timer_create compat_sys_timer_create
527 x32 mq_notify compat_sys_mq_notify
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index cc2f8c13128..872eb60e780 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -55,7 +55,7 @@ static FILE *input_file; /* Input file name */
static void usage(const char *err)
{
if (err)
- fprintf(stderr, "Error: %s\n\n", err);
+ fprintf(stderr, "%s: Error: %s\n\n", prog, err);
fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
fprintf(stderr, "\t-y 64bit mode\n");
fprintf(stderr, "\t-n 32bit mode\n");
@@ -269,7 +269,13 @@ int main(int argc, char **argv)
insns++;
}
- fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed);
+ fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
+ prog,
+ (errors) ? "Failure" : "Success",
+ insns,
+ (input_file) ? "given" : "random",
+ errors,
+ seed);
return errors ? 1 : 0;
}
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5a1847d6193..79d67bd507f 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -814,12 +814,14 @@ int main(int argc, char **argv)
read_relocs(fp);
if (show_absolute_syms) {
print_absolute_symbols();
- return 0;
+ goto out;
}
if (show_absolute_relocs) {
print_absolute_relocs();
- return 0;
+ goto out;
}
emit_relocs(as_text, use_real_mode);
+out:
+ fclose(fp);
return 0;
}
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 53c90fd412d..14ef8d1dbc3 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,7 +13,6 @@ endmenu
config UML_X86
def_bool y
select GENERIC_FIND_FIRST_BIT
- select GENERIC_SIGALTSTACK
config 64BIT
bool "64-bit kernel" if SUBARCH = "x86"
@@ -25,6 +24,8 @@ config X86_32
select ARCH_WANT_IPC_PARSE_VERSION
select MODULES_USE_ELF_REL
select CLONE_BACKWARDS
+ select OLD_SIGSUSPEND3
+ select OLD_SIGACTION
config X86_64
def_bool 64BIT
@@ -37,9 +38,8 @@ config RWSEM_GENERIC_SPINLOCK
def_bool !RWSEM_XCHGADD_ALGORITHM
config 3_LEVEL_PGTABLES
- bool "Three-level pagetables (EXPERIMENTAL)" if !64BIT
+ bool "Three-level pagetables" if !64BIT
default 64BIT
- depends on EXPERIMENTAL
help
Three-level pagetables will let UML have more than 4G of physical
memory. All the memory that can't be mapped directly will be treated
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 5d065b2222d..eafa324eb7a 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -10,7 +10,7 @@ endif
obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
- stub_$(BITS).o stub_segv.o syscalls_$(BITS).o \
+ stub_$(BITS).o stub_segv.o \
sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
mem_$(BITS).o subarch.o os-$(OS)/
@@ -25,7 +25,7 @@ subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o
else
-obj-y += vdso/
+obj-y += syscalls_64.o vdso/
subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \
../lib/rwsem.o
diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c
index 8784ab30d91..84ac7f7b025 100644
--- a/arch/x86/um/fault.c
+++ b/arch/x86/um/fault.c
@@ -20,7 +20,7 @@ int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
const struct exception_table_entry *fixup;
fixup = search_exception_tables(address);
- if (fixup != 0) {
+ if (fixup) {
UPT_IP(regs) = fixup->fixup;
return 1;
}
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
index 8436079be91..68fd2cf526f 100644
--- a/arch/x86/um/shared/sysdep/syscalls_32.h
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
@@ -8,11 +8,6 @@
typedef long syscall_handler_t(struct pt_regs);
-/* Not declared on x86, incompatible declarations on x86_64, so these have
- * to go here rather than in sys_call_table.c
- */
-extern syscall_handler_t sys_rt_sigaction;
-
extern syscall_handler_t *sys_call_table[];
#define EXECUTE_SYSCALL(syscall, regs) \
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 71cef48ea5c..ae7319db18e 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -464,7 +464,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
return 0;
}
-long sys_sigreturn(struct pt_regs *regs)
+long sys_sigreturn(void)
{
unsigned long sp = PT_REGS_SP(&current->thread.regs);
struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
@@ -577,7 +577,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
}
#endif
-long sys_rt_sigreturn(struct pt_regs *regs)
+long sys_rt_sigreturn(void)
{
unsigned long sp = PT_REGS_SP(&current->thread.regs);
struct rt_sigframe __user *frame =
@@ -601,14 +601,3 @@ long sys_rt_sigreturn(struct pt_regs *regs)
force_sig(SIGSEGV, current);
return 0;
}
-
-#ifdef CONFIG_X86_32
-long ptregs_sigreturn(void)
-{
- return sys_sigreturn(NULL);
-}
-long ptregs_rt_sigreturn(void)
-{
- return sys_rt_sigreturn(NULL);
-}
-#endif
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index a0c3b0d1a12..531d4269e2e 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -24,10 +24,6 @@
#define old_mmap sys_old_mmap
-#define ptregs_iopl sys_iopl
-#define ptregs_vm86old sys_vm86old
-#define ptregs_vm86 sys_vm86
-
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
#include <asm/syscalls_32.h>
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
deleted file mode 100644
index e8bcea99acd..00000000000
--- a/arch/x86/um/syscalls_32.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#include <linux/syscalls.h>
-#include <sysdep/syscalls.h>
-
-long sys_sigaction(int sig, const struct old_sigaction __user *act,
- struct old_sigaction __user *oact)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
-
- if (act) {
- old_sigset_t mask;
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
- __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(mask, &act->sa_mask))
- return -EFAULT;
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
- return -EFAULT;
- }
-
- return ret;
-}
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 205ad328aa5..c74436e687b 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -60,7 +60,7 @@ notrace static cycle_t vread_tsc(void)
static notrace cycle_t vread_hpet(void)
{
- return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
+ return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
}
#ifdef CONFIG_PARAVIRT_CLOCK
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 138e5667409..39928d16be3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1517,72 +1517,51 @@ asmlinkage void __init xen_start_kernel(void)
#endif
}
-#ifdef CONFIG_XEN_PVHVM
-#define HVM_SHARED_INFO_ADDR 0xFE700000UL
-static struct shared_info *xen_hvm_shared_info;
-static unsigned long xen_hvm_sip_phys;
-static int xen_major, xen_minor;
-
-static void xen_hvm_connect_shared_info(unsigned long pfn)
+void __ref xen_hvm_init_shared_info(void)
{
+ int cpu;
struct xen_add_to_physmap xatp;
+ static struct shared_info *shared_info_page = 0;
+ if (!shared_info_page)
+ shared_info_page = (struct shared_info *)
+ extend_brk(PAGE_SIZE, PAGE_SIZE);
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = pfn;
+ xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
-}
-static void __init xen_hvm_set_shared_info(struct shared_info *sip)
-{
- int cpu;
-
- HYPERVISOR_shared_info = sip;
+ HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
* page, we use it in the event channel upcall and in some pvclock
* related functions. We don't need the vcpu_info placement
* optimizations because we don't use any pv_mmu or pv_irq op on
- * HVM. */
- for_each_online_cpu(cpu)
+ * HVM.
+ * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
+ * online but xen_hvm_init_shared_info is run at resume time too and
+ * in that case multiple vcpus might be online. */
+ for_each_online_cpu(cpu) {
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
-}
-
-/* Reconnect the shared_info pfn to a (new) mfn */
-void xen_hvm_resume_shared_info(void)
-{
- xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
-}
-
-/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage.
- * On these old tools the shared info page will be placed in E820_Ram.
- * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects
- * that nothing is mapped up to HVM_SHARED_INFO_ADDR.
- * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used
- * here for the shared info page. */
-static void __init xen_hvm_init_shared_info(void)
-{
- if (xen_major < 4) {
- xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
- xen_hvm_sip_phys = __pa(xen_hvm_shared_info);
- } else {
- xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR;
- set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys);
- xen_hvm_shared_info =
- (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
}
- xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
- xen_hvm_set_shared_info(xen_hvm_shared_info);
}
+#ifdef CONFIG_XEN_PVHVM
static void __init init_hvm_pv_info(void)
{
- uint32_t ecx, edx, pages, msr, base;
+ int major, minor;
+ uint32_t eax, ebx, ecx, edx, pages, msr, base;
u64 pfn;
base = xen_cpuid_base();
+ cpuid(base + 1, &eax, &ebx, &ecx, &edx);
+
+ major = eax >> 16;
+ minor = eax & 0xffff;
+ printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
+
cpuid(base + 2, &pages, &msr, &ecx, &edx);
pfn = __pa(hypercall_page);
@@ -1633,22 +1612,12 @@ static void __init xen_hvm_guest_init(void)
static bool __init xen_hvm_platform(void)
{
- uint32_t eax, ebx, ecx, edx, base;
-
if (xen_pv_domain())
return false;
- base = xen_cpuid_base();
- if (!base)
+ if (!xen_cpuid_base())
return false;
- cpuid(base + 1, &eax, &ebx, &ecx, &edx);
-
- xen_major = eax >> 16;
- xen_minor = eax & 0xffff;
-
- printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor);
-
return true;
}
@@ -1668,6 +1637,7 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
.name = "Xen HVM",
.detect = xen_hvm_platform,
.init_platform = xen_hvm_guest_init,
+ .x2apic_available = xen_x2apic_para_available,
};
EXPORT_SYMBOL(x86_hyper_xen_hvm);
#endif
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 01de35c7722..e8e34938c57 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1178,20 +1178,6 @@ static void xen_exit_mmap(struct mm_struct *mm)
static void xen_post_allocator_init(void);
-static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
-{
- /* reserve the range used */
- native_pagetable_reserve(start, end);
-
- /* set as RW the rest */
- printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
- PFN_PHYS(pgt_buf_top));
- while (end < PFN_PHYS(pgt_buf_top)) {
- make_lowmem_page_readwrite(__va(end));
- end += PAGE_SIZE;
- }
-}
-
#ifdef CONFIG_X86_64
static void __init xen_cleanhighmap(unsigned long vaddr,
unsigned long vaddr_end)
@@ -1422,7 +1408,6 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
xen_mc_callback(set_current_cr3, (void *)cr3);
}
}
-
static void xen_write_cr3(unsigned long cr3)
{
BUG_ON(preemptible());
@@ -1448,6 +1433,45 @@ static void xen_write_cr3(unsigned long cr3)
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
}
+#ifdef CONFIG_X86_64
+/*
+ * At the start of the day - when Xen launches a guest, it has already
+ * built pagetables for the guest. We diligently look over them
+ * in xen_setup_kernel_pagetable and graft as appropiate them in the
+ * init_level4_pgt and its friends. Then when we are happy we load
+ * the new init_level4_pgt - and continue on.
+ *
+ * The generic code starts (start_kernel) and 'init_mem_mapping' sets
+ * up the rest of the pagetables. When it has completed it loads the cr3.
+ * N.B. that baremetal would start at 'start_kernel' (and the early
+ * #PF handler would create bootstrap pagetables) - so we are running
+ * with the same assumptions as what to do when write_cr3 is executed
+ * at this point.
+ *
+ * Since there are no user-page tables at all, we have two variants
+ * of xen_write_cr3 - the early bootup (this one), and the late one
+ * (xen_write_cr3). The reason we have to do that is that in 64-bit
+ * the Linux kernel and user-space are both in ring 3 while the
+ * hypervisor is in ring 0.
+ */
+static void __init xen_write_cr3_init(unsigned long cr3)
+{
+ BUG_ON(preemptible());
+
+ xen_mc_batch(); /* disables interrupts */
+
+ /* Update while interrupts are disabled, so its atomic with
+ respect to ipis */
+ this_cpu_write(xen_cr3, cr3);
+
+ __xen_write_cr3(true, cr3);
+
+ xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
+
+ pv_mmu_ops.write_cr3 = &xen_write_cr3;
+}
+#endif
+
static int xen_pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd = mm->pgd;
@@ -1503,19 +1527,6 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
#else /* CONFIG_X86_64 */
static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
{
- unsigned long pfn = pte_pfn(pte);
-
- /*
- * If the new pfn is within the range of the newly allocated
- * kernel pagetable, and it isn't being mapped into an
- * early_ioremap fixmap slot as a freshly allocated page, make sure
- * it is RO.
- */
- if (((!is_early_ioremap_ptep(ptep) &&
- pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
- (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
- pte = pte_wrprotect(pte);
-
return pte;
}
#endif /* CONFIG_X86_64 */
@@ -2129,11 +2140,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.write_cr2 = xen_write_cr2,
.read_cr3 = xen_read_cr3,
-#ifdef CONFIG_X86_32
.write_cr3 = xen_write_cr3_init,
-#else
- .write_cr3 = xen_write_cr3,
-#endif
.flush_tlb_user = xen_flush_tlb,
.flush_tlb_kernel = xen_flush_tlb,
@@ -2197,7 +2204,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
void __init xen_init_mmu_ops(void)
{
- x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_init = xen_pagetable_init;
pv_mmu_ops = xen_mmu_ops;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8971a26d21a..94eac5c85cd 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -556,12 +556,9 @@ void __init xen_arch_setup(void)
COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
/* Set up idle, making sure it calls safe_halt() pvop */
-#ifdef CONFIG_X86_32
- boot_cpu_data.hlt_works_ok = 1;
-#endif
disable_cpuidle();
disable_cpufreq();
- WARN_ON(set_pm_idle_to_default());
+ WARN_ON(xen_set_default_idle());
fiddle_vdso();
#ifdef CONFIG_NUMA
numa_off = 1;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 4f7d2599b48..09ea61d2e02 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -300,8 +300,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_table(cpu);
ctxt->flags = VGCF_IN_KERNEL;
- ctxt->user_regs.ds = __USER_DS;
- ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.ss = __KERNEL_DS;
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
@@ -310,35 +308,41 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
- ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
- xen_copy_trap_info(ctxt->trap_ctxt);
+ {
+ ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
+ ctxt->user_regs.ds = __USER_DS;
+ ctxt->user_regs.es = __USER_DS;
- ctxt->ldt_ents = 0;
+ xen_copy_trap_info(ctxt->trap_ctxt);
- BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+ ctxt->ldt_ents = 0;
- gdt_mfn = arbitrary_virt_to_mfn(gdt);
- make_lowmem_page_readonly(gdt);
- make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
+ BUG_ON((unsigned long)gdt & ~PAGE_MASK);
- ctxt->gdt_frames[0] = gdt_mfn;
- ctxt->gdt_ents = GDT_ENTRIES;
+ gdt_mfn = arbitrary_virt_to_mfn(gdt);
+ make_lowmem_page_readonly(gdt);
+ make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
- ctxt->user_regs.cs = __KERNEL_CS;
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
+ ctxt->gdt_frames[0] = gdt_mfn;
+ ctxt->gdt_ents = GDT_ENTRIES;
- ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.sp0;
+ ctxt->kernel_ss = __KERNEL_DS;
+ ctxt->kernel_sp = idle->thread.sp0;
#ifdef CONFIG_X86_32
- ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->failsafe_callback_cs = __KERNEL_CS;
+ ctxt->event_callback_cs = __KERNEL_CS;
+ ctxt->failsafe_callback_cs = __KERNEL_CS;
#endif
- ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
- ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
+ ctxt->event_callback_eip =
+ (unsigned long)xen_hypervisor_callback;
+ ctxt->failsafe_callback_eip =
+ (unsigned long)xen_failsafe_callback;
+ }
+ ctxt->user_regs.cs = __KERNEL_CS;
+ ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
@@ -432,13 +436,6 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
cpu_bringup();
- /*
- * Balance out the preempt calls - as we are running in cpu_idle
- * loop which has been called at bootup from cpu_bringup_and_idle.
- * The cpucpu_bringup_and_idle called cpu_bringup which made a
- * preempt_disable() So this preempt_enable will balance it out.
- */
- preempt_enable();
}
#else /* !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 83e866d714c..f7a080ef035 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -328,7 +328,6 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
if (per_cpu(lock_spinners, cpu) == xl) {
ADD_STATS(released_slow_kicked, 1);
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
- break;
}
}
}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index ae8a00c39de..45329c8c226 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
{
#ifdef CONFIG_XEN_PVHVM
int cpu;
- xen_hvm_resume_shared_info();
+ xen_hvm_init_shared_info();
xen_callback_vector();
xen_unplug_emulated_devices();
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index f9643fc50de..33ca6e42a4c 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -89,11 +89,11 @@ ENTRY(xen_iret)
*/
#ifdef CONFIG_SMP
GET_THREAD_INFO(%eax)
- movl TI_cpu(%eax), %eax
- movl __per_cpu_offset(,%eax,4), %eax
- mov xen_vcpu(%eax), %eax
+ movl %ss:TI_cpu(%eax), %eax
+ movl %ss:__per_cpu_offset(,%eax,4), %eax
+ mov %ss:xen_vcpu(%eax), %eax
#else
- movl xen_vcpu, %eax
+ movl %ss:xen_vcpu, %eax
#endif
/* check IF state we're restoring */
@@ -106,11 +106,11 @@ ENTRY(xen_iret)
* resuming the code, so we don't have to be worried about
* being preempted to another CPU.
*/
- setz XEN_vcpu_info_mask(%eax)
+ setz %ss:XEN_vcpu_info_mask(%eax)
xen_iret_start_crit:
/* check for unmasked and pending */
- cmpw $0x0001, XEN_vcpu_info_pending(%eax)
+ cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
/*
* If there's something pending, mask events again so we can
@@ -118,7 +118,7 @@ xen_iret_start_crit:
* touch XEN_vcpu_info_mask.
*/
jne 1f
- movb $1, XEN_vcpu_info_mask(%eax)
+ movb $1, %ss:XEN_vcpu_info_mask(%eax)
1: popl %eax
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d2e73d19d36..a95b41744ad 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -40,7 +40,7 @@ void xen_enable_syscall(void);
void xen_vcpu_restore(void);
void xen_callback_vector(void);
-void xen_hvm_resume_shared_info(void);
+void xen_hvm_init_shared_info(void);
void xen_unplug_emulated_devices(void);
void __init xen_build_dynamic_phys_to_machine(void);