summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig25
-rw-r--r--arch/x86/Kconfig.debug11
-rw-r--r--arch/x86/Makefile7
-rw-r--r--arch/x86/boot/compressed/eboot.c67
-rw-r--r--arch/x86/boot/compressed/head_64.S2
-rw-r--r--arch/x86/boot/tools/build.c1
-rw-r--r--arch/x86/configs/kvm_guest.config28
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S48
-rw-r--r--arch/x86/ia32/ia32_aout.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/apic.h27
-rw-r--r--arch/x86/include/asm/cpufeature.h118
-rw-r--r--arch/x86/include/asm/desc.h117
-rw-r--r--arch/x86/include/asm/efi.h35
-rw-r--r--arch/x86/include/asm/entry_arch.h8
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/fpu-internal.h4
-rw-r--r--arch/x86/include/asm/hw_irq.h17
-rw-r--r--arch/x86/include/asm/irq.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/microcode.h4
-rw-r--r--arch/x86/include/asm/microcode_amd.h78
-rw-r--r--arch/x86/include/asm/microcode_intel.h2
-rw-r--r--arch/x86/include/asm/mshyperv.h3
-rw-r--r--arch/x86/include/asm/mutex_32.h11
-rw-r--r--arch/x86/include/asm/mutex_64.h11
-rw-r--r--arch/x86/include/asm/nmi.h4
-rw-r--r--arch/x86/include/asm/perf_event.h3
-rw-r--r--arch/x86/include/asm/pgtable.h3
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/sighandling.h4
-rw-r--r--arch/x86/include/asm/special_insns.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h104
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h3
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h1
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h3
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h154
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/apic/apic.c71
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c66
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/bugs.c21
-rw-r--r--arch/x86/kernel/cpu/common.c39
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c52
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c12
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c24
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c24
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c23
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event.h24
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c34
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.c504
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.h40
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c138
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c178
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h4
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/doublefault.c (renamed from arch/x86/kernel/doublefault_32.c)15
-rw-r--r--arch/x86/kernel/entry_32.S12
-rw-r--r--arch/x86/kernel/entry_64.S38
-rw-r--r--arch/x86/kernel/head_32.S21
-rw-r--r--arch/x86/kernel/head_64.S8
-rw-r--r--arch/x86/kernel/i387.c59
-rw-r--r--arch/x86/kernel/irq.c33
-rw-r--r--arch/x86/kernel/irq_work.c24
-rw-r--r--arch/x86/kernel/kprobes/core.c14
-rw-r--r--arch/x86/kernel/kvmclock.c1
-rw-r--r--arch/x86/kernel/microcode_amd.c133
-rw-r--r--arch/x86/kernel/microcode_amd_early.c302
-rw-r--r--arch/x86/kernel/microcode_core_early.c49
-rw-r--r--arch/x86/kernel/microcode_intel_early.c6
-rw-r--r--arch/x86/kernel/nmi.c36
-rw-r--r--arch/x86/kernel/process.c12
-rw-r--r--arch/x86/kernel/process_32.c13
-rw-r--r--arch/x86/kernel/process_64.c11
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S34
-rw-r--r--arch/x86/kernel/signal.c16
-rw-r--r--arch/x86/kernel/smp.c65
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/kernel/tboot.c73
-rw-r--r--arch/x86/kernel/tracepoint.c61
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/kernel/xsave.c5
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/mm/hugetlbpage.c187
-rw-r--r--arch/x86/mm/init.c6
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/ioremap.c8
-rw-r--r--arch/x86/platform/efi/efi.c200
-rw-r--r--arch/x86/tools/relocs.c4
-rw-r--r--arch/x86/vdso/vdso32-setup.c4
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/smp.c8
110 files changed, 2938 insertions, 957 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 685692c94f0..b094816a7e0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -207,6 +207,12 @@ config ARCH_HIBERNATION_POSSIBLE
config ARCH_SUSPEND_POSSIBLE
def_bool y
+config ARCH_WANT_HUGE_PMD_SHARE
+ def_bool y
+
+config ARCH_WANT_GENERAL_HUGETLB
+ def_bool y
+
config ZONE_DMA32
bool
default X86_64
@@ -336,6 +342,7 @@ config X86_EXTENDED_PLATFORM
If you enable this option then you'll be able to select support
for the following (non-PC) 32 bit x86 platforms:
+ Goldfish (Android emulator)
AMD Elan
NUMAQ (IBM/Sequent)
RDC R-321x SoC
@@ -410,6 +417,7 @@ config X86_UV
config X86_GOLDFISH
bool "Goldfish (Virtual Platform)"
depends on X86_32
+ depends on X86_EXTENDED_PLATFORM
---help---
Enable support for the Goldfish virtual platform used primarily
for Android development. Unless you are building for the Android
@@ -1058,8 +1066,16 @@ config MICROCODE_INTEL_LIB
depends on MICROCODE_INTEL
config MICROCODE_INTEL_EARLY
+ def_bool n
+
+config MICROCODE_AMD_EARLY
+ def_bool n
+
+config MICROCODE_EARLY
bool "Early load microcode"
- depends on MICROCODE_INTEL && BLK_DEV_INITRD
+ depends on MICROCODE=y && BLK_DEV_INITRD
+ select MICROCODE_INTEL_EARLY if MICROCODE_INTEL
+ select MICROCODE_AMD_EARLY if MICROCODE_AMD
default y
help
This option provides functionality to read additional microcode data
@@ -1067,10 +1083,6 @@ config MICROCODE_INTEL_EARLY
microcode to CPU's as early as possible. No functional change if no
microcode data is glued to the initrd, therefore it's safe to say Y.
-config MICROCODE_EARLY
- def_bool y
- depends on MICROCODE_INTEL_EARLY
-
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
---help---
@@ -1725,7 +1737,7 @@ config PHYSICAL_ALIGN
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
- depends on SMP && HOTPLUG
+ depends on SMP
---help---
Say Y here to allow turning CPUs off and on. CPUs can be
controlled through /sys/devices/system/cpu.
@@ -2265,6 +2277,7 @@ source "fs/Kconfig.binfmt"
config IA32_EMULATION
bool "IA32 Emulation"
depends on X86_64
+ select BINFMT_ELF
select COMPAT_BINFMT_ELF
select HAVE_UID16
---help---
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index c198b7e13e7..c963881de0d 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -122,7 +122,6 @@ config DEBUG_NX_TEST
config DOUBLEFAULT
default y
bool "Enable doublefault exception handler" if EXPERT
- depends on X86_32
---help---
This option allows trapping of rare doublefault exceptions that
would otherwise cause a system to silently reboot. Disabling this
@@ -304,4 +303,14 @@ config DEBUG_NMI_SELFTEST
If unsure, say N.
+config X86_DEBUG_STATIC_CPU_HAS
+ bool "Debug alternatives"
+ depends on DEBUG_KERNEL
+ ---help---
+ This option causes additional code to be generated which
+ fails if static_cpu_has() is used before alternatives have
+ run.
+
+ If unsure, say N.
+
endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5c477260294..07639c656fc 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -220,6 +220,12 @@ archclean:
$(Q)$(MAKE) $(clean)=$(boot)
$(Q)$(MAKE) $(clean)=arch/x86/tools
+PHONY += kvmconfig
+kvmconfig:
+ $(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target))
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config arch/x86/configs/kvm_guest.config
+ $(Q)yes "" | $(MAKE) oldconfig
+
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
echo ' install - Install kernel using'
@@ -233,4 +239,5 @@ define archhelp
echo ' bzdisk/fdimage*/isoimage also accept:'
echo ' FDARGS="..." arguments for the booted kernel'
echo ' FDINITRD=file initrd for the booted kernel'
+ echo ' kvmconfig - Enable additional options for guest kernel support'
endef
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 35ee62fccf9..d606463aa6d 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -251,51 +251,6 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size)
*size = len;
}
-static efi_status_t setup_efi_vars(struct boot_params *params)
-{
- struct setup_data *data;
- struct efi_var_bootdata *efidata;
- u64 store_size, remaining_size, var_size;
- efi_status_t status;
-
- if (sys_table->runtime->hdr.revision < EFI_2_00_SYSTEM_TABLE_REVISION)
- return EFI_UNSUPPORTED;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- status = efi_call_phys4((void *)sys_table->runtime->query_variable_info,
- EFI_VARIABLE_NON_VOLATILE |
- EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS, &store_size,
- &remaining_size, &var_size);
-
- if (status != EFI_SUCCESS)
- return status;
-
- status = efi_call_phys3(sys_table->boottime->allocate_pool,
- EFI_LOADER_DATA, sizeof(*efidata), &efidata);
-
- if (status != EFI_SUCCESS)
- return status;
-
- efidata->data.type = SETUP_EFI_VARS;
- efidata->data.len = sizeof(struct efi_var_bootdata) -
- sizeof(struct setup_data);
- efidata->data.next = 0;
- efidata->store_size = store_size;
- efidata->remaining_size = remaining_size;
- efidata->max_var_size = var_size;
-
- if (data)
- data->next = (unsigned long)efidata;
- else
- params->hdr.setup_data = (unsigned long)efidata;
-
-}
-
static efi_status_t setup_efi_pci(struct boot_params *params)
{
efi_pci_io_protocol *pci;
@@ -1037,18 +992,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
efi_memory_desc_t *mem_map;
efi_status_t status;
__u32 desc_version;
+ bool called_exit = false;
u8 nr_entries;
int i;
size = sizeof(*mem_map) * 32;
again:
- size += sizeof(*mem_map);
+ size += sizeof(*mem_map) * 2;
_size = size;
status = low_alloc(size, 1, (unsigned long *)&mem_map);
if (status != EFI_SUCCESS)
return status;
+get_map:
status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
mem_map, &key, &desc_size, &desc_version);
if (status == EFI_BUFFER_TOO_SMALL) {
@@ -1074,8 +1031,20 @@ again:
/* Might as well exit boot services now */
status = efi_call_phys2(sys_table->boottime->exit_boot_services,
handle, key);
- if (status != EFI_SUCCESS)
- goto free_mem_map;
+ if (status != EFI_SUCCESS) {
+ /*
+ * ExitBootServices() will fail if any of the event
+ * handlers change the memory map. In which case, we
+ * must be prepared to retry, but only once so that
+ * we're guaranteed to exit on repeated failures instead
+ * of spinning forever.
+ */
+ if (called_exit)
+ goto free_mem_map;
+
+ called_exit = true;
+ goto get_map;
+ }
/* Historic? */
boot_params->alt_mem_k = 32 * 1024;
@@ -1202,8 +1171,6 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
setup_graphics(boot_params);
- setup_efi_vars(boot_params);
-
setup_efi_pci(boot_params);
status = efi_call_phys3(sys_table->boottime->allocate_pool,
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 16f24e6dad7..06e71c2c16b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -27,8 +27,6 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/pgtable_types.h>
-#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/msr.h>
#include <asm/processor-flags.h>
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 94c54465002..c941d6a8887 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -243,6 +243,7 @@ static void parse_zoffset(char *fname)
c = fread(buf, 1, sizeof(buf) - 1, file);
if (ferror(file))
die("read-error on `zoffset.h'");
+ fclose(file);
buf[c] = 0;
p = (char *)buf;
diff --git a/arch/x86/configs/kvm_guest.config b/arch/x86/configs/kvm_guest.config
new file mode 100644
index 00000000000..f9affcc3b9f
--- /dev/null
+++ b/arch/x86/configs/kvm_guest.config
@@ -0,0 +1,28 @@
+CONFIG_NET=y
+CONFIG_NET_CORE=y
+CONFIG_NETDEVICES=y
+CONFIG_BLOCK=y
+CONFIG_BLK_DEV=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_INET=y
+CONFIG_TTY=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_BINFMT_ELF=y
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_NET=y
+CONFIG_9P_FS=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 62fe22cd4cb..477e9d75149 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2681,56 +2681,68 @@ ENTRY(aesni_xts_crypt8)
addq %rcx, KEYP
movdqa IV, STATE1
- pxor 0x00(INP), STATE1
+ movdqu 0x00(INP), INC
+ pxor INC, STATE1
movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE2
- pxor 0x10(INP), STATE2
+ movdqu 0x10(INP), INC
+ pxor INC, STATE2
movdqu IV, 0x10(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE3
- pxor 0x20(INP), STATE3
+ movdqu 0x20(INP), INC
+ pxor INC, STATE3
movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
- pxor 0x30(INP), STATE4
+ movdqu 0x30(INP), INC
+ pxor INC, STATE4
movdqu IV, 0x30(OUTP)
call *%r11
- pxor 0x00(OUTP), STATE1
+ movdqu 0x00(OUTP), INC
+ pxor INC, STATE1
movdqu STATE1, 0x00(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE1
- pxor 0x40(INP), STATE1
+ movdqu 0x40(INP), INC
+ pxor INC, STATE1
movdqu IV, 0x40(OUTP)
- pxor 0x10(OUTP), STATE2
+ movdqu 0x10(OUTP), INC
+ pxor INC, STATE2
movdqu STATE2, 0x10(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE2
- pxor 0x50(INP), STATE2
+ movdqu 0x50(INP), INC
+ pxor INC, STATE2
movdqu IV, 0x50(OUTP)
- pxor 0x20(OUTP), STATE3
+ movdqu 0x20(OUTP), INC
+ pxor INC, STATE3
movdqu STATE3, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE3
- pxor 0x60(INP), STATE3
+ movdqu 0x60(INP), INC
+ pxor INC, STATE3
movdqu IV, 0x60(OUTP)
- pxor 0x30(OUTP), STATE4
+ movdqu 0x30(OUTP), INC
+ pxor INC, STATE4
movdqu STATE4, 0x30(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
- pxor 0x70(INP), STATE4
+ movdqu 0x70(INP), INC
+ pxor INC, STATE4
movdqu IV, 0x70(OUTP)
_aesni_gf128mul_x_ble()
@@ -2738,16 +2750,20 @@ ENTRY(aesni_xts_crypt8)
call *%r11
- pxor 0x40(OUTP), STATE1
+ movdqu 0x40(OUTP), INC
+ pxor INC, STATE1
movdqu STATE1, 0x40(OUTP)
- pxor 0x50(OUTP), STATE2
+ movdqu 0x50(OUTP), INC
+ pxor INC, STATE2
movdqu STATE2, 0x50(OUTP)
- pxor 0x60(OUTP), STATE3
+ movdqu 0x60(OUTP), INC
+ pxor INC, STATE3
movdqu STATE3, 0x60(OUTP)
- pxor 0x70(OUTP), STATE4
+ movdqu 0x70(OUTP), INC
+ pxor INC, STATE4
movdqu STATE4, 0x70(OUTP)
ret
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 805078e0801..52ff81cce00 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -192,7 +192,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
/* struct user */
DUMP_WRITE(&dump, sizeof(dump));
/* Now dump all of the user data. Include malloced stuff as well */
- DUMP_SEEK(PAGE_SIZE);
+ DUMP_SEEK(PAGE_SIZE - sizeof(dump));
/* now we start writing out the user space info */
set_fs(USER_DS);
/* Dump the data area */
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index cf1a471a18a..bccfca68430 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -34,8 +34,6 @@
#include <asm/sys_ia32.h>
#include <asm/smap.h>
-#define FIX_EFLAGS __FIX_EFLAGS
-
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
{
int err = 0;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 33880342223..f8119b582c3 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -12,6 +12,7 @@
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/msr.h>
+#include <asm/idle.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -687,5 +688,31 @@ extern int default_check_phys_apicid_present(int phys_apicid);
#endif
#endif /* CONFIG_X86_LOCAL_APIC */
+extern void irq_enter(void);
+extern void irq_exit(void);
+
+static inline void entering_irq(void)
+{
+ irq_enter();
+ exit_idle();
+}
+
+static inline void entering_ack_irq(void)
+{
+ ack_APIC_irq();
+ entering_irq();
+}
+
+static inline void exiting_irq(void)
+{
+ irq_exit();
+}
+
+static inline void exiting_ack_irq(void)
+{
+ irq_exit();
+ /* Ack only at the end to avoid potential reentry */
+ ack_APIC_irq();
+}
#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e99ac27f95b..47538a61c91 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -92,7 +92,7 @@
#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */
#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
- /* 21 available, was AMD_C1E */
+#define X86_FEATURE_ALWAYS (3*32+21) /* "" Always-present feature */
#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */
#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
@@ -356,15 +356,36 @@ extern const char * const x86_power_flags[32];
#endif /* CONFIG_X86_64 */
#if __GNUC__ >= 4
+extern void warn_pre_alternatives(void);
+extern bool __static_cpu_has_safe(u16 bit);
+
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
* These are only valid after alternatives have run, but will statically
* patch the target code for additional performance.
- *
*/
static __always_inline __pure bool __static_cpu_has(u16 bit)
{
#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
+
+#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+ /*
+ * Catch too early usage of this before alternatives
+ * have run.
+ */
+ asm goto("1: jmp %l[t_warn]\n"
+ "2:\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n"
+ " .long 0\n" /* no replacement */
+ " .word %P0\n" /* 1: do replace */
+ " .byte 2b - 1b\n" /* source len */
+ " .byte 0\n" /* replacement len */
+ ".previous\n"
+ /* skipping size check since replacement size = 0 */
+ : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
+#endif
+
asm goto("1: jmp %l[t_no]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
@@ -379,7 +400,13 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
return true;
t_no:
return false;
-#else
+
+#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+ t_warn:
+ warn_pre_alternatives();
+ return false;
+#endif
+#else /* GCC_VERSION >= 40500 */
u8 flag;
/* Open-coded due to __stringify() in ALTERNATIVE() */
asm volatile("1: movb $0,%0\n"
@@ -411,11 +438,94 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
__static_cpu_has(bit) : \
boot_cpu_has(bit) \
)
+
+static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+{
+#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
+/*
+ * We need to spell the jumps to the compiler because, depending on the offset,
+ * the replacement jump can be bigger than the original jump, and this we cannot
+ * have. Thus, we force the jump to the widest, 4-byte, signed relative
+ * offset even though the last would often fit in less bytes.
+ */
+ asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+ "2:\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 3f - .\n" /* repl offset */
+ " .word %P1\n" /* always replace */
+ " .byte 2b - 1b\n" /* src len */
+ " .byte 4f - 3f\n" /* repl len */
+ ".previous\n"
+ ".section .altinstr_replacement,\"ax\"\n"
+ "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
+ "4:\n"
+ ".previous\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 0\n" /* no replacement */
+ " .word %P0\n" /* feature bit */
+ " .byte 2b - 1b\n" /* src len */
+ " .byte 0\n" /* repl len */
+ ".previous\n"
+ : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
+ : : t_dynamic, t_no);
+ return true;
+ t_no:
+ return false;
+ t_dynamic:
+ return __static_cpu_has_safe(bit);
+#else /* GCC_VERSION >= 40500 */
+ u8 flag;
+ /* Open-coded due to __stringify() in ALTERNATIVE() */
+ asm volatile("1: movb $2,%0\n"
+ "2:\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 3f - .\n" /* repl offset */
+ " .word %P2\n" /* always replace */
+ " .byte 2b - 1b\n" /* source len */
+ " .byte 4f - 3f\n" /* replacement len */
+ ".previous\n"
+ ".section .discard,\"aw\",@progbits\n"
+ " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
+ ".previous\n"
+ ".section .altinstr_replacement,\"ax\"\n"
+ "3: movb $0,%0\n"
+ "4:\n"
+ ".previous\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 5f - .\n" /* repl offset */
+ " .word %P1\n" /* feature bit */
+ " .byte 4b - 3b\n" /* src len */
+ " .byte 6f - 5f\n" /* repl len */
+ ".previous\n"
+ ".section .discard,\"aw\",@progbits\n"
+ " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
+ ".previous\n"
+ ".section .altinstr_replacement,\"ax\"\n"
+ "5: movb $1,%0\n"
+ "6:\n"
+ ".previous\n"
+ : "=qm" (flag)
+ : "i" (bit), "i" (X86_FEATURE_ALWAYS));
+ return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
+#endif
+}
+
+#define static_cpu_has_safe(bit) \
+( \
+ __builtin_constant_p(boot_cpu_has(bit)) ? \
+ boot_cpu_has(bit) : \
+ _static_cpu_has_safe(bit) \
+)
#else
/*
* gcc 3.x is too stupid to do the static test; fall back to dynamic.
*/
-#define static_cpu_has(bit) boot_cpu_has(bit)
+#define static_cpu_has(bit) boot_cpu_has(bit)
+#define static_cpu_has_safe(bit) boot_cpu_has(bit)
#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 8bf1c06070d..b90e5dfeee4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -36,8 +36,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
-extern struct desc_ptr nmi_idt_descr;
-extern gate_desc nmi_idt_table[];
+extern struct desc_ptr debug_idt_descr;
+extern gate_desc debug_idt_table[];
struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
@@ -316,7 +316,20 @@ static inline void set_nmi_gate(int gate, void *addr)
gate_desc s;
pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
- write_idt_entry(nmi_idt_table, gate, &s);
+ write_idt_entry(debug_idt_table, gate, &s);
+}
+#endif
+
+#ifdef CONFIG_TRACING
+extern struct desc_ptr trace_idt_descr;
+extern gate_desc trace_idt_table[];
+static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
+{
+ write_idt_entry(trace_idt_table, entry, gate);
+}
+#else
+static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
+{
}
#endif
@@ -331,6 +344,7 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
* setup time
*/
write_idt_entry(idt_table, gate, &s);
+ write_trace_idt_entry(gate, &s);
}
/*
@@ -360,12 +374,39 @@ static inline void alloc_system_vector(int vector)
}
}
-static inline void alloc_intr_gate(unsigned int n, void *addr)
+#ifdef CONFIG_TRACING
+static inline void trace_set_intr_gate(unsigned int gate, void *addr)
+{
+ gate_desc s;
+
+ pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
+ write_idt_entry(trace_idt_table, gate, &s);
+}
+
+static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
+{
+ trace_set_intr_gate(n, addr);
+}
+#else
+static inline void trace_set_intr_gate(unsigned int gate, void *addr)
+{
+}
+
+#define __trace_alloc_intr_gate(n, addr)
+#endif
+
+static inline void __alloc_intr_gate(unsigned int n, void *addr)
{
- alloc_system_vector(n);
set_intr_gate(n, addr);
}
+#define alloc_intr_gate(n, addr) \
+ do { \
+ alloc_system_vector(n); \
+ __alloc_intr_gate(n, addr); \
+ __trace_alloc_intr_gate(n, trace_##addr); \
+ } while (0)
+
/*
* This routine sets up an interrupt gate at directory privilege level 3.
*/
@@ -405,4 +446,70 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
}
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU(u32, debug_idt_ctr);
+static inline bool is_debug_idt_enabled(void)
+{
+ if (this_cpu_read(debug_idt_ctr))
+ return true;
+
+ return false;
+}
+
+static inline void load_debug_idt(void)
+{
+ load_idt((const struct desc_ptr *)&debug_idt_descr);
+}
+#else
+static inline bool is_debug_idt_enabled(void)
+{
+ return false;
+}
+
+static inline void load_debug_idt(void)
+{
+}
+#endif
+
+#ifdef CONFIG_TRACING
+extern atomic_t trace_idt_ctr;
+static inline bool is_trace_idt_enabled(void)
+{
+ if (atomic_read(&trace_idt_ctr))
+ return true;
+
+ return false;
+}
+
+static inline void load_trace_idt(void)
+{
+ load_idt((const struct desc_ptr *)&trace_idt_descr);
+}
+#else
+static inline bool is_trace_idt_enabled(void)
+{
+ return false;
+}
+
+static inline void load_trace_idt(void)
+{
+}
+#endif
+
+/*
+ * The load_current_idt() must be called with interrupts disabled
+ * to avoid races. That way the IDT will always be set back to the expected
+ * descriptor. It's also called when a CPU is being initialized, and
+ * that doesn't need to disable interrupts, as nothing should be
+ * bothering the CPU then.
+ */
+static inline void load_current_idt(void)
+{
+ if (is_debug_idt_enabled())
+ load_debug_idt();
+ else if (is_trace_idt_enabled())
+ load_trace_idt();
+ else
+ load_idt((const struct desc_ptr *)&idt_descr);
+}
#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 2fb5d5884e2..0062a012504 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -52,40 +52,40 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
u64 arg4, u64 arg5, u64 arg6);
#define efi_call_phys0(f) \
- efi_call0((void *)(f))
+ efi_call0((f))
#define efi_call_phys1(f, a1) \
- efi_call1((void *)(f), (u64)(a1))
+ efi_call1((f), (u64)(a1))
#define efi_call_phys2(f, a1, a2) \
- efi_call2((void *)(f), (u64)(a1), (u64)(a2))
+ efi_call2((f), (u64)(a1), (u64)(a2))
#define efi_call_phys3(f, a1, a2, a3) \
- efi_call3((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3))
+ efi_call3((f), (u64)(a1), (u64)(a2), (u64)(a3))
#define efi_call_phys4(f, a1, a2, a3, a4) \
- efi_call4((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \
+ efi_call4((f), (u64)(a1), (u64)(a2), (u64)(a3), \
(u64)(a4))
#define efi_call_phys5(f, a1, a2, a3, a4, a5) \
- efi_call5((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \
+ efi_call5((f), (u64)(a1), (u64)(a2), (u64)(a3), \
(u64)(a4), (u64)(a5))
#define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \
- efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \
+ efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \
(u64)(a4), (u64)(a5), (u64)(a6))
#define efi_call_virt0(f) \
- efi_call0((void *)(efi.systab->runtime->f))
+ efi_call0((efi.systab->runtime->f))
#define efi_call_virt1(f, a1) \
- efi_call1((void *)(efi.systab->runtime->f), (u64)(a1))
+ efi_call1((efi.systab->runtime->f), (u64)(a1))
#define efi_call_virt2(f, a1, a2) \
- efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
+ efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
#define efi_call_virt3(f, a1, a2, a3) \
- efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+ efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
(u64)(a3))
#define efi_call_virt4(f, a1, a2, a3, a4) \
- efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+ efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
(u64)(a3), (u64)(a4))
#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
- efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+ efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
(u64)(a3), (u64)(a4), (u64)(a5))
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
- efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+ efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
(u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
@@ -102,13 +102,6 @@ extern void efi_call_phys_epilog(void);
extern void efi_unmap_memmap(void);
extern void efi_memory_uc(u64 addr, unsigned long size);
-struct efi_var_bootdata {
- struct setup_data data;
- u64 store_size;
- u64 remaining_size;
- u64 max_var_size;
-};
-
#ifdef CONFIG_EFI
static inline bool efi_is_native(void)
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 9bd4ecac72b..dc5fa661465 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -13,14 +13,16 @@
BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
-BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
-BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
+BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR,
+ smp_irq_move_cleanup_interrupt)
+BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt)
#endif
BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
#ifdef CONFIG_HAVE_KVM
-BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
+BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
+ smp_kvm_posted_intr_ipi)
#endif
/*
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 0dc7d9e21c3..e846225265e 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -81,11 +81,11 @@ enum fixed_addresses {
+ ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
VVAR_PAGE,
VSYSCALL_HPET,
-#endif
#ifdef CONFIG_PARAVIRT_CLOCK
PVCLOCK_FIXMAP_BEGIN,
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
#endif
+#endif
FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE,
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index e25cc33ec54..4d0bda7b11e 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -62,10 +62,8 @@ extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
#define xstateregs_active fpregs_active
#ifdef CONFIG_MATH_EMULATION
-# define HAVE_HWFP (boot_cpu_data.hard_math)
extern void finit_soft_fpu(struct i387_soft_struct *soft);
#else
-# define HAVE_HWFP 1
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif
@@ -345,7 +343,7 @@ static inline void __thread_fpu_end(struct task_struct *tsk)
static inline void __thread_fpu_begin(struct task_struct *tsk)
{
- if (!use_eager_fpu())
+ if (!static_cpu_has_safe(X86_FEATURE_EAGER_FPU))
clts();
__thread_set_has_fpu(tsk);
}
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1da97efad08..e4ac559c4a2 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -77,6 +77,23 @@ extern void threshold_interrupt(void);
extern void call_function_interrupt(void);
extern void call_function_single_interrupt(void);
+#ifdef CONFIG_TRACING
+/* Interrupt handlers registered during init_IRQ */
+extern void trace_apic_timer_interrupt(void);
+extern void trace_x86_platform_ipi(void);
+extern void trace_error_interrupt(void);
+extern void trace_irq_work_interrupt(void);
+extern void trace_spurious_interrupt(void);
+extern void trace_thermal_interrupt(void);
+extern void trace_reschedule_interrupt(void);
+extern void trace_threshold_interrupt(void);
+extern void trace_call_function_interrupt(void);
+extern void trace_call_function_single_interrupt(void);
+#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
+#define trace_reboot_interrupt reboot_interrupt
+#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
+#endif /* CONFIG_TRACING */
+
/* IOAPIC */
#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
extern unsigned long io_apic_irqs;
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index ba870bb6dd8..57873beb329 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -41,4 +41,9 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
extern void init_ISA_irqs(void);
+#ifdef CONFIG_X86_LOCAL_APIC
+void arch_trigger_all_cpu_backtrace(void);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+#endif
+
#endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3741c653767..af9c5525434 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -59,7 +59,7 @@
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
- | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \
+ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index fa5f71e021d..6b52980c29c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -61,7 +61,7 @@
#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
#define MCJ_EXCEPTION 0x8 /* raise as exception */
-#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
+#define MCJ_IRQ_BROADCAST 0x10 /* do IRQ broadcasting */
#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 6825e2efd1b..6bc3985ee47 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -60,11 +60,11 @@ static inline void __exit exit_amd_microcode(void) {}
#ifdef CONFIG_MICROCODE_EARLY
#define MAX_UCODE_COUNT 128
extern void __init load_ucode_bsp(void);
-extern __init void load_ucode_ap(void);
+extern void __cpuinit load_ucode_ap(void);
extern int __init save_microcode_in_initrd(void);
#else
static inline void __init load_ucode_bsp(void) {}
-static inline __init void load_ucode_ap(void) {}
+static inline void __cpuinit load_ucode_ap(void) {}
static inline int __init save_microcode_in_initrd(void)
{
return 0;
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
new file mode 100644
index 00000000000..c6b043f4027
--- /dev/null
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -0,0 +1,78 @@
+#ifndef _ASM_X86_MICROCODE_AMD_H
+#define _ASM_X86_MICROCODE_AMD_H
+
+#include <asm/microcode.h>
+
+#define UCODE_MAGIC 0x00414d44
+#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
+#define UCODE_UCODE_TYPE 0x00000001
+
+#define SECTION_HDR_SIZE 8
+#define CONTAINER_HDR_SZ 12
+
+struct equiv_cpu_entry {
+ u32 installed_cpu;
+ u32 fixed_errata_mask;
+ u32 fixed_errata_compare;
+ u16 equiv_cpu;
+ u16 res;
+} __attribute__((packed));
+
+struct microcode_header_amd {
+ u32 data_code;
+ u32 patch_id;
+ u16 mc_patch_data_id;
+ u8 mc_patch_data_len;
+ u8 init_flag;
+ u32 mc_patch_data_checksum;
+ u32 nb_dev_id;
+ u32 sb_dev_id;
+ u16 processor_rev_id;
+ u8 nb_rev_id;
+ u8 sb_rev_id;
+ u8 bios_api_rev;
+ u8 reserved1[3];
+ u32 match_reg[8];
+} __attribute__((packed));
+
+struct microcode_amd {
+ struct microcode_header_amd hdr;
+ unsigned int mpb[0];
+};
+
+static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
+ unsigned int sig)
+{
+ int i = 0;
+
+ if (!equiv_cpu_table)
+ return 0;
+
+ while (equiv_cpu_table[i].installed_cpu != 0) {
+ if (sig == equiv_cpu_table[i].installed_cpu)
+ return equiv_cpu_table[i].equiv_cpu;
+
+ i++;
+ }
+ return 0;
+}
+
+extern int __apply_microcode_amd(struct microcode_amd *mc_amd);
+extern int apply_microcode_amd(int cpu);
+extern enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size);
+
+#ifdef CONFIG_MICROCODE_AMD_EARLY
+#ifdef CONFIG_X86_32
+#define MPB_MAX_SIZE PAGE_SIZE
+extern u8 amd_bsp_mpb[MPB_MAX_SIZE];
+#endif
+extern void __init load_ucode_amd_bsp(void);
+extern void __cpuinit load_ucode_amd_ap(void);
+extern int __init save_microcode_in_initrd_amd(void);
+#else
+static inline void __init load_ucode_amd_bsp(void) {}
+static inline void __cpuinit load_ucode_amd_ap(void) {}
+static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; }
+#endif
+
+#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 5356f927d41..87a085333cb 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -67,10 +67,12 @@ update_match_revision(struct microcode_header_intel *mc_header, int rev);
extern void __init load_ucode_intel_bsp(void);
extern void __cpuinit load_ucode_intel_ap(void);
extern void show_ucode_info_early(void);
+extern int __init save_microcode_in_initrd_intel(void);
#else
static inline __init void load_ucode_intel_bsp(void) {}
static inline __cpuinit void load_ucode_intel_ap(void) {}
static inline void show_ucode_info_early(void) {}
+static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; }
#endif
#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c2934be2446..cd9c41938b8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -12,6 +12,9 @@ struct ms_hyperv_info {
extern struct ms_hyperv_info ms_hyperv;
void hyperv_callback_vector(void);
+#ifdef CONFIG_TRACING
+#define trace_hyperv_callback_vector hyperv_callback_vector
+#endif
void hyperv_vector_handler(struct pt_regs *regs);
void hv_register_vmbus_handler(int irq, irq_handler_t handler);
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 03f90c8a5a7..0208c3c2cbc 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -42,17 +42,14 @@ do { \
* __mutex_fastpath_lock_retval - try to take the lock by moving the count
* from 1 to a 0 value
* @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
*
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
- * wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
*/
-static inline int __mutex_fastpath_lock_retval(atomic_t *count,
- int (*fail_fn)(atomic_t *))
+static inline int __mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(atomic_dec_return(count) < 0))
- return fail_fn(count);
+ return -1;
else
return 0;
}
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 68a87b0f8e2..2c543fff241 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -37,17 +37,14 @@ do { \
* __mutex_fastpath_lock_retval - try to take the lock by moving the count
* from 1 to a 0 value
* @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
*
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
*/
-static inline int __mutex_fastpath_lock_retval(atomic_t *count,
- int (*fail_fn)(atomic_t *))
+static inline int __mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(atomic_dec_return(count) < 0))
- return fail_fn(count);
+ return -1;
else
return 0;
}
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c0fa356e90d..86f9301903c 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -18,9 +18,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
extern int unknown_nmi_panic;
-void arch_trigger_all_cpu_backtrace(void);
-#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
-#endif
+#endif /* CONFIG_X86_LOCAL_APIC */
#define NMI_FLAG_FIRST 1
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 57cb6340221..8249df45d2f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,6 +29,9 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
+#define HSW_IN_TX (1ULL << 32)
+#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
+
#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1e672234c4f..5b0818bc896 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -506,9 +506,6 @@ static inline unsigned long pages_to_mb(unsigned long npg)
return npg >> (20 - PAGE_SHIFT);
}
-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
-
#if PAGETABLE_LEVELS > 2
static inline int pud_none(pud_t pud)
{
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 22224b3b43b..29937c4f6ff 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -89,9 +89,9 @@ struct cpuinfo_x86 {
char wp_works_ok; /* It doesn't on 386's */
/* Problems on some 486Dx4's and old 386's: */
- char hard_math;
char rfu;
char pad0;
+ char pad1;
#else
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
@@ -164,6 +164,7 @@ extern const struct seq_operations cpuinfo_op;
#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
extern void cpu_detect(struct cpuinfo_x86 *c);
+extern void __cpuinit fpu_detect(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
extern void identify_boot_cpu(void);
@@ -981,5 +982,5 @@ bool xen_set_default_idle(void);
#endif
void stop_this_cpu(void *dummy);
-
+void df_debug(struct pt_regs *regs, long error_code);
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index beff97f7df3..7a958164088 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -7,10 +7,10 @@
#include <asm/processor-flags.h>
-#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
+#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
- X86_EFLAGS_CF)
+ X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 41fc93a2e22..2f4d924fe6c 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -16,7 +16,7 @@ static inline void native_clts(void)
* all loads stores around it, which can hurt performance. Solution is to
* use a variable and mimic reads and writes to it to enforce serialization
*/
-static unsigned long __force_order;
+extern unsigned long __force_order;
static inline unsigned long native_read_cr0(void)
{
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 50a7fc0f824..cf512003e66 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void)
static inline void __flush_tlb_one(unsigned long addr)
{
- __flush_tlb_single(addr);
+ __flush_tlb_single(addr);
}
#define TLB_FLUSH_ALL -1UL
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
new file mode 100644
index 00000000000..2874df24e7a
--- /dev/null
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -0,0 +1,104 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq_vectors
+
+#if !defined(_TRACE_IRQ_VECTORS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_VECTORS_H
+
+#include <linux/tracepoint.h>
+
+extern void trace_irq_vector_regfunc(void);
+extern void trace_irq_vector_unregfunc(void);
+
+DECLARE_EVENT_CLASS(x86_irq_vector,
+
+ TP_PROTO(int vector),
+
+ TP_ARGS(vector),
+
+ TP_STRUCT__entry(
+ __field( int, vector )
+ ),
+
+ TP_fast_assign(
+ __entry->vector = vector;
+ ),
+
+ TP_printk("vector=%d", __entry->vector) );
+
+#define DEFINE_IRQ_VECTOR_EVENT(name) \
+DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \
+ TP_PROTO(int vector), \
+ TP_ARGS(vector), \
+ trace_irq_vector_regfunc, \
+ trace_irq_vector_unregfunc); \
+DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
+ TP_PROTO(int vector), \
+ TP_ARGS(vector), \
+ trace_irq_vector_regfunc, \
+ trace_irq_vector_unregfunc);
+
+
+/*
+ * local_timer - called when entering/exiting a local timer interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(local_timer);
+
+/*
+ * reschedule - called when entering/exiting a reschedule vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(reschedule);
+
+/*
+ * spurious_apic - called when entering/exiting a spurious apic vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(spurious_apic);
+
+/*
+ * error_apic - called when entering/exiting an error apic vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(error_apic);
+
+/*
+ * x86_platform_ipi - called when entering/exiting a x86 platform ipi interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);
+
+/*
+ * irq_work - called when entering/exiting a irq work interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(irq_work);
+
+/*
+ * call_function - called when entering/exiting a call function interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(call_function);
+
+/*
+ * call_function_single - called when entering/exiting a call function
+ * single interrupt vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(call_function_single);
+
+/*
+ * threshold_apic - called when entering/exiting a threshold apic interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
+
+/*
+ * thermal_apic - called when entering/exiting a thermal apic interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(thermal_apic);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE irq_vectors
+#endif /* _TRACE_IRQ_VECTORS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 142810c457d..4f7923dd000 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -235,7 +235,7 @@ extern long __copy_user_nocache(void *dst, const void __user *src,
static inline int
__copy_from_user_nocache(void *dst, const void __user *src, unsigned size)
{
- might_sleep();
+ might_fault();
return __copy_user_nocache(dst, src, size, 1);
}
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index a06983cdc12..0b46ef261c7 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -731,6 +731,9 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
}
extern void uv_bau_message_intr1(void);
+#ifdef CONFIG_TRACING
+#define trace_uv_bau_message_intr1 uv_bau_message_intr1
+#endif
extern void uv_bau_timeout_intr1(void);
struct atomic_short {
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 08744242b8d..c15ddaf9071 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -6,7 +6,6 @@
#define SETUP_E820_EXT 1
#define SETUP_DTB 2
#define SETUP_PCI 3
-#define SETUP_EFI_VARS 4
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 2af848dfa75..bb0465090ae 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -170,6 +170,9 @@
#define MSR_KNC_EVNTSEL0 0x00000028
#define MSR_KNC_EVNTSEL1 0x00000029
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0 0x000004c1
+
/* AMD64 MSRs. Not complete. See the architecture manual for a more
complete list. */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 54991a74604..180a0c3c224 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -2,75 +2,129 @@
#define _UAPI_ASM_X86_PROCESSOR_FLAGS_H
/* Various flags defined: can be included from assembler. */
+#include <linux/const.h>
+
/*
* EFLAGS bits
*/
-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
-#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
-#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+#define X86_EFLAGS_CF_BIT 0 /* Carry Flag */
+#define X86_EFLAGS_CF _BITUL(X86_EFLAGS_CF_BIT)
+#define X86_EFLAGS_FIXED_BIT 1 /* Bit 1 - always on */
+#define X86_EFLAGS_FIXED _BITUL(X86_EFLAGS_FIXED_BIT)
+#define X86_EFLAGS_PF_BIT 2 /* Parity Flag */
+#define X86_EFLAGS_PF _BITUL(X86_EFLAGS_PF_BIT)
+#define X86_EFLAGS_AF_BIT 4 /* Auxiliary carry Flag */
+#define X86_EFLAGS_AF _BITUL(X86_EFLAGS_AF_BIT)
+#define X86_EFLAGS_ZF_BIT 6 /* Zero Flag */
+#define X86_EFLAGS_ZF _BITUL(X86_EFLAGS_ZF_BIT)
+#define X86_EFLAGS_SF_BIT 7 /* Sign Flag */
+#define X86_EFLAGS_SF _BITUL(X86_EFLAGS_SF_BIT)
+#define X86_EFLAGS_TF_BIT 8 /* Trap Flag */
+#define X86_EFLAGS_TF _BITUL(X86_EFLAGS_TF_BIT)
+#define X86_EFLAGS_IF_BIT 9 /* Interrupt Flag */
+#define X86_EFLAGS_IF _BITUL(X86_EFLAGS_IF_BIT)
+#define X86_EFLAGS_DF_BIT 10 /* Direction Flag */
+#define X86_EFLAGS_DF _BITUL(X86_EFLAGS_DF_BIT)
+#define X86_EFLAGS_OF_BIT 11 /* Overflow Flag */
+#define X86_EFLAGS_OF _BITUL(X86_EFLAGS_OF_BIT)
+#define X86_EFLAGS_IOPL_BIT 12 /* I/O Privilege Level (2 bits) */
+#define X86_EFLAGS_IOPL (_AC(3,UL) << X86_EFLAGS_IOPL_BIT)
+#define X86_EFLAGS_NT_BIT 14 /* Nested Task */
+#define X86_EFLAGS_NT _BITUL(X86_EFLAGS_NT_BIT)
+#define X86_EFLAGS_RF_BIT 16 /* Resume Flag */
+#define X86_EFLAGS_RF _BITUL(X86_EFLAGS_RF_BIT)
+#define X86_EFLAGS_VM_BIT 17 /* Virtual Mode */
+#define X86_EFLAGS_VM _BITUL(X86_EFLAGS_VM_BIT)
+#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */
+#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT)
+#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */
+#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT)
+#define X86_EFLAGS_VIF_BIT 19 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIF _BITUL(X86_EFLAGS_VIF_BIT)
+#define X86_EFLAGS_VIP_BIT 20 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_VIP _BITUL(X86_EFLAGS_VIP_BIT)
+#define X86_EFLAGS_ID_BIT 21 /* CPUID detection */
+#define X86_EFLAGS_ID _BITUL(X86_EFLAGS_ID_BIT)
/*
* Basic CPU control in CR0
*/
-#define X86_CR0_PE 0x00000001 /* Protection Enable */
-#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */
-#define X86_CR0_EM 0x00000004 /* Emulation */
-#define X86_CR0_TS 0x00000008 /* Task Switched */
-#define X86_CR0_ET 0x00000010 /* Extension Type */
-#define X86_CR0_NE 0x00000020 /* Numeric Error */
-#define X86_CR0_WP 0x00010000 /* Write Protect */
-#define X86_CR0_AM 0x00040000 /* Alignment Mask */
-#define X86_CR0_NW 0x20000000 /* Not Write-through */
-#define X86_CR0_CD 0x40000000 /* Cache Disable */
-#define X86_CR0_PG 0x80000000 /* Paging */
+#define X86_CR0_PE_BIT 0 /* Protection Enable */
+#define X86_CR0_PE _BITUL(X86_CR0_PE_BIT)
+#define X86_CR0_MP_BIT 1 /* Monitor Coprocessor */
+#define X86_CR0_MP _BITUL(X86_CR0_MP_BIT)
+#define X86_CR0_EM_BIT 2 /* Emulation */
+#define X86_CR0_EM _BITUL(X86_CR0_EM_BIT)
+#define X86_CR0_TS_BIT 3 /* Task Switched */
+#define X86_CR0_TS _BITUL(X86_CR0_TS_BIT)
+#define X86_CR0_ET_BIT 4 /* Extension Type */
+#define X86_CR0_ET _BITUL(X86_CR0_ET_BIT)
+#define X86_CR0_NE_BIT 5 /* Numeric Error */
+#define X86_CR0_NE _BITUL(X86_CR0_NE_BIT)
+#define X86_CR0_WP_BIT 16 /* Write Protect */
+#define X86_CR0_WP _BITUL(X86_CR0_WP_BIT)
+#define X86_CR0_AM_BIT 18 /* Alignment Mask */
+#define X86_CR0_AM _BITUL(X86_CR0_AM_BIT)
+#define X86_CR0_NW_BIT 29 /* Not Write-through */
+#define X86_CR0_NW _BITUL(X86_CR0_NW_BIT)
+#define X86_CR0_CD_BIT 30 /* Cache Disable */
+#define X86_CR0_CD _BITUL(X86_CR0_CD_BIT)
+#define X86_CR0_PG_BIT 31 /* Paging */
+#define X86_CR0_PG _BITUL(X86_CR0_PG_BIT)
/*
* Paging options in CR3
*/
-#define X86_CR3_PWT 0x00000008 /* Page Write Through */
-#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
-#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
+#define X86_CR3_PWT_BIT 3 /* Page Write Through */
+#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
+#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
+#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
+#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
/*
* Intel CPU features in CR4
*/
-#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */
-#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */
-#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */
-#define X86_CR4_DE 0x00000008 /* enable debugging extensions */
-#define X86_CR4_PSE 0x00000010 /* enable page size extensions */
-#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */
-#define X86_CR4_MCE 0x00000040 /* Machine check enable */
-#define X86_CR4_PGE 0x00000080 /* enable global pages */
-#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */
-#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */
-#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
-#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
-#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
-#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
-#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
-#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
-#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */
+#define X86_CR4_VME_BIT 0 /* enable vm86 extensions */
+#define X86_CR4_VME _BITUL(X86_CR4_VME_BIT)
+#define X86_CR4_PVI_BIT 1 /* virtual interrupts flag enable */
+#define X86_CR4_PVI _BITUL(X86_CR4_PVI_BIT)
+#define X86_CR4_TSD_BIT 2 /* disable time stamp at ipl 3 */
+#define X86_CR4_TSD _BITUL(X86_CR4_TSD_BIT)
+#define X86_CR4_DE_BIT 3 /* enable debugging extensions */
+#define X86_CR4_DE _BITUL(X86_CR4_DE_BIT)
+#define X86_CR4_PSE_BIT 4 /* enable page size extensions */
+#define X86_CR4_PSE _BITUL(X86_CR4_PSE_BIT)
+#define X86_CR4_PAE_BIT 5 /* enable physical address extensions */
+#define X86_CR4_PAE _BITUL(X86_CR4_PAE_BIT)
+#define X86_CR4_MCE_BIT 6 /* Machine check enable */
+#define X86_CR4_MCE _BITUL(X86_CR4_MCE_BIT)
+#define X86_CR4_PGE_BIT 7 /* enable global pages */
+#define X86_CR4_PGE _BITUL(X86_CR4_PGE_BIT)
+#define X86_CR4_PCE_BIT 8 /* enable performance counters at ipl 3 */
+#define X86_CR4_PCE _BITUL(X86_CR4_PCE_BIT)
+#define X86_CR4_OSFXSR_BIT 9 /* enable fast FPU save and restore */
+#define X86_CR4_OSFXSR _BITUL(X86_CR4_OSFXSR_BIT)
+#define X86_CR4_OSXMMEXCPT_BIT 10 /* enable unmasked SSE exceptions */
+#define X86_CR4_OSXMMEXCPT _BITUL(X86_CR4_OSXMMEXCPT_BIT)
+#define X86_CR4_VMXE_BIT 13 /* enable VMX virtualization */
+#define X86_CR4_VMXE _BITUL(X86_CR4_VMXE_BIT)
+#define X86_CR4_SMXE_BIT 14 /* enable safer mode (TXT) */
+#define X86_CR4_SMXE _BITUL(X86_CR4_SMXE_BIT)
+#define X86_CR4_FSGSBASE_BIT 16 /* enable RDWRFSGS support */
+#define X86_CR4_FSGSBASE _BITUL(X86_CR4_FSGSBASE_BIT)
+#define X86_CR4_PCIDE_BIT 17 /* enable PCID support */
+#define X86_CR4_PCIDE _BITUL(X86_CR4_PCIDE_BIT)
+#define X86_CR4_OSXSAVE_BIT 18 /* enable xsave and xrestore */
+#define X86_CR4_OSXSAVE _BITUL(X86_CR4_OSXSAVE_BIT)
+#define X86_CR4_SMEP_BIT 20 /* enable SMEP support */
+#define X86_CR4_SMEP _BITUL(X86_CR4_SMEP_BIT)
+#define X86_CR4_SMAP_BIT 21 /* enable SMAP support */
+#define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT)
/*
* x86-64 Task Priority Register, CR8
*/
-#define X86_CR8_TPR 0x0000000F /* task priority register */
+#define X86_CR8_TPR _AC(0x0000000f,UL) /* task priority register */
/*
* AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7bd3bd31010..88d99ea7772 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,6 +16,8 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
+CFLAGS_irq.o := -I$(src)/../include/asm/trace
+
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
@@ -67,7 +69,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
+obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_VM86) += vm86_32.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
@@ -93,6 +95,7 @@ obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o
microcode-y := microcode_core.o
microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
+obj-$(CONFIG_MICROCODE_AMD_EARLY) += microcode_amd_early.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
@@ -102,6 +105,7 @@ obj-$(CONFIG_OF) += devicetree.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
+obj-$(CONFIG_TRACING) += tracepoint.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 904611bf0e5..99663b59123 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,6 +35,7 @@
#include <linux/smp.h>
#include <linux/mm.h>
+#include <asm/trace/irq_vectors.h>
#include <asm/irq_remapping.h>
#include <asm/perf_event.h>
#include <asm/x86_init.h>
@@ -919,17 +920,35 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
+ *
+ * update_process_times() expects us to have done irq_enter().
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
*/
- ack_APIC_irq();
+ entering_ack_irq();
+ local_apic_timer_interrupt();
+ exiting_irq();
+
+ set_irq_regs(old_regs);
+}
+
+void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
/*
+ * NOTE! We'd better ACK the irq immediately,
+ * because timer handling can be slow.
+ *
* update_process_times() expects us to have done irq_enter().
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
- irq_enter();
- exit_idle();
+ entering_ack_irq();
+ trace_local_timer_entry(LOCAL_TIMER_VECTOR);
local_apic_timer_interrupt();
- irq_exit();
+ trace_local_timer_exit(LOCAL_TIMER_VECTOR);
+ exiting_irq();
set_irq_regs(old_regs);
}
@@ -1907,12 +1926,10 @@ int __init APIC_init_uniprocessor(void)
/*
* This interrupt should _never_ happen with our APIC/SMP architecture
*/
-void smp_spurious_interrupt(struct pt_regs *regs)
+static inline void __smp_spurious_interrupt(void)
{
u32 v;
- irq_enter();
- exit_idle();
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
@@ -1927,13 +1944,28 @@ void smp_spurious_interrupt(struct pt_regs *regs)
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
pr_info("spurious APIC interrupt on CPU#%d, "
"should never happen.\n", smp_processor_id());
- irq_exit();
+}
+
+void smp_spurious_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ __smp_spurious_interrupt();
+ exiting_irq();
+}
+
+void smp_trace_spurious_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
+ __smp_spurious_interrupt();
+ trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
+ exiting_irq();
}
/*
* This interrupt should never happen with our APIC/SMP architecture
*/
-void smp_error_interrupt(struct pt_regs *regs)
+static inline void __smp_error_interrupt(struct pt_regs *regs)
{
u32 v0, v1;
u32 i = 0;
@@ -1948,8 +1980,6 @@ void smp_error_interrupt(struct pt_regs *regs)
"Illegal register address", /* APIC Error Bit 7 */
};
- irq_enter();
- exit_idle();
/* First tickle the hardware, only then report what went on. -- REW */
v0 = apic_read(APIC_ESR);
apic_write(APIC_ESR, 0);
@@ -1970,7 +2000,22 @@ void smp_error_interrupt(struct pt_regs *regs)
apic_printk(APIC_DEBUG, KERN_CONT "\n");
- irq_exit();
+}
+
+void smp_error_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ __smp_error_interrupt(regs);
+ exiting_irq();
+}
+
+void smp_trace_error_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ trace_error_apic_entry(ERROR_APIC_VECTOR);
+ __smp_error_interrupt(regs);
+ trace_error_apic_exit(ERROR_APIC_VECTOR);
+ exiting_irq();
}
/**
@@ -2302,7 +2347,7 @@ static void lapic_resume(void)
apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+#if defined(CONFIG_X86_MCE_INTEL)
if (maxlvt >= 5)
apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
#endif
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 31cb9ae992b..a698d7165c9 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -9,6 +9,7 @@
*
*/
#include <asm/apic.h>
+#include <asm/nmi.h>
#include <linux/cpumask.h>
#include <linux/kdebug.h>
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 794f6eb54cd..39cc7f7acab 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -51,6 +51,8 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
static u64 gru_start_paddr, gru_end_paddr;
+static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
+static u64 gru_dist_lmask, gru_dist_umask;
static union uvh_apicid uvh_apicid;
int uv_min_hub_revision_id;
EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
@@ -72,7 +74,20 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
static inline bool is_GRU_range(u64 start, u64 end)
{
- return start >= gru_start_paddr && end <= gru_end_paddr;
+ if (gru_dist_base) {
+ u64 su = start & gru_dist_umask; /* upper (incl pnode) bits */
+ u64 sl = start & gru_dist_lmask; /* base offset bits */
+ u64 eu = end & gru_dist_umask;
+ u64 el = end & gru_dist_lmask;
+
+ /* Must reside completely within a single GRU range */
+ return (sl == gru_dist_base && el == gru_dist_base &&
+ su >= gru_first_node_paddr &&
+ su <= gru_last_node_paddr &&
+ eu == su);
+ } else {
+ return start >= gru_start_paddr && end <= gru_end_paddr;
+ }
}
static bool uv_is_untracked_pat_range(u64 start, u64 end)
@@ -463,26 +478,63 @@ static __init void map_high(char *id, unsigned long base, int pshift,
pr_info("UV: Map %s_HI base address NULL\n", id);
return;
}
- pr_info("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
+ pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
if (map_type == map_uc)
init_extra_mapping_uc(paddr, bytes);
else
init_extra_mapping_wb(paddr, bytes);
}
+static __init void map_gru_distributed(unsigned long c)
+{
+ union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+ u64 paddr;
+ unsigned long bytes;
+ int nid;
+
+ gru.v = c;
+ /* only base bits 42:28 relevant in dist mode */
+ gru_dist_base = gru.v & 0x000007fff0000000UL;
+ if (!gru_dist_base) {
+ pr_info("UV: Map GRU_DIST base address NULL\n");
+ return;
+ }
+ bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
+ gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
+ gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
+ for_each_online_node(nid) {
+ paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
+ gru_dist_base;
+ init_extra_mapping_wb(paddr, bytes);
+ gru_first_node_paddr = min(paddr, gru_first_node_paddr);
+ gru_last_node_paddr = max(paddr, gru_last_node_paddr);
+ }
+ /* Save upper (63:M) bits of address only for is_GRU_range */
+ gru_first_node_paddr &= gru_dist_umask;
+ gru_last_node_paddr &= gru_dist_umask;
+ pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n",
+ gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
+}
+
static __init void map_gru_high(int max_pnode)
{
union uvh_rh_gam_gru_overlay_config_mmr_u gru;
int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
- if (gru.s.enable) {
- map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
- gru_start_paddr = ((u64)gru.s.base << shift);
- gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
- } else {
+ if (!gru.s.enable) {
pr_info("UV: GRU disabled\n");
+ return;
+ }
+
+ if (is_uv3_hub() && gru.s3.mode) {
+ map_gru_distributed(gru.v);
+ return;
}
+ map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
+ gru_start_paddr = ((u64)gru.s.base << shift);
+ gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
}
static __init void map_mmr_high(int max_pnode)
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 0ef4bba2acb..d67c4be3e8b 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -28,7 +28,6 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
- OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math);
OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b0684e4a73a..47b56a7e99c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
+endif
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
endif
+
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4112be9a465..03445346ee0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,15 +17,6 @@
#include <asm/paravirt.h>
#include <asm/alternative.h>
-static int __init no_387(char *s)
-{
- boot_cpu_data.hard_math = 0;
- write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0());
- return 1;
-}
-
-__setup("no387", no_387);
-
static double __initdata x = 4195835.0;
static double __initdata y = 3145727.0;
@@ -44,15 +35,6 @@ static void __init check_fpu(void)
{
s32 fdiv_bug;
- if (!boot_cpu_data.hard_math) {
-#ifndef CONFIG_MATH_EMULATION
- pr_emerg("No coprocessor found and no math emulation present\n");
- pr_emerg("Giving up\n");
- for (;;) ;
-#endif
- return;
- }
-
kernel_fpu_begin();
/*
@@ -107,5 +89,6 @@ void __init check_bugs(void)
* kernel_fpu_begin/end() in check_fpu() relies on the patched
* alternative instructions.
*/
- check_fpu();
+ if (cpu_has_fpu)
+ check_fpu();
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 22018f70a67..548bd039784 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -711,10 +711,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
return;
cpu_detect(c);
-
get_cpu_vendor(c);
-
get_cpu_cap(c);
+ fpu_detect(c);
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
@@ -724,6 +723,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_bsp_init)
this_cpu->c_bsp_init(c);
+
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS);
}
void __init early_cpu_init(void)
@@ -1071,8 +1072,8 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
-struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
- (unsigned long) nmi_idt_table };
+struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
+ (unsigned long) debug_idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
@@ -1148,20 +1149,20 @@ int is_debug_stack(unsigned long addr)
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
}
-static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
+DEFINE_PER_CPU(u32, debug_idt_ctr);
void debug_stack_set_zero(void)
{
- this_cpu_inc(debug_stack_use_ctr);
- load_idt((const struct desc_ptr *)&nmi_idt_descr);
+ this_cpu_inc(debug_idt_ctr);
+ load_current_idt();
}
void debug_stack_reset(void)
{
- if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
+ if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
return;
- if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
- load_idt((const struct desc_ptr *)&idt_descr);
+ if (this_cpu_dec_return(debug_idt_ctr) == 0)
+ load_current_idt();
}
#else /* CONFIG_X86_64 */
@@ -1257,7 +1258,7 @@ void __cpuinit cpu_init(void)
switch_to_new_gdt(cpu);
loadsegment(fs, 0);
- load_idt((const struct desc_ptr *)&idt_descr);
+ load_current_idt();
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
syscall_init();
@@ -1334,7 +1335,7 @@ void __cpuinit cpu_init(void)
if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
- load_idt(&idt_descr);
+ load_current_idt();
switch_to_new_gdt(cpu);
/*
@@ -1363,3 +1364,17 @@ void __cpuinit cpu_init(void)
fpu_init();
}
#endif
+
+#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+void warn_pre_alternatives(void)
+{
+ WARN(1, "You're using static_cpu_has before alternatives have run!\n");
+}
+EXPORT_SYMBOL_GPL(warn_pre_alternatives);
+#endif
+
+inline bool __static_cpu_has_safe(u16 bit)
+{
+ return boot_cpu_has(bit);
+}
+EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index d048d5ca43c..7582f475b16 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -333,7 +333,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
switch (dir0_lsn) {
case 0xd: /* either a 486SLC or DLC w/o DEVID */
dir0_msn = 0;
- p = Cx486_name[(c->hard_math) ? 1 : 0];
+ p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
break;
case 0xe: /* a 486S A step */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 7c6f7d548c0..8dc72dda66f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -618,36 +618,34 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
* parameters cpuid leaf to find the cache details
*/
for (i = 0; i < num_cache_leaves; i++) {
- struct _cpuid4_info_regs this_leaf;
+ struct _cpuid4_info_regs this_leaf = {};
int retval;
retval = cpuid4_cache_lookup_regs(i, &this_leaf);
- if (retval >= 0) {
- switch (this_leaf.eax.split.level) {
- case 1:
- if (this_leaf.eax.split.type ==
- CACHE_TYPE_DATA)
- new_l1d = this_leaf.size/1024;
- else if (this_leaf.eax.split.type ==
- CACHE_TYPE_INST)
- new_l1i = this_leaf.size/1024;
- break;
- case 2:
- new_l2 = this_leaf.size/1024;
- num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
- index_msb = get_count_order(num_threads_sharing);
- l2_id = c->apicid & ~((1 << index_msb) - 1);
- break;
- case 3:
- new_l3 = this_leaf.size/1024;
- num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
- index_msb = get_count_order(
- num_threads_sharing);
- l3_id = c->apicid & ~((1 << index_msb) - 1);
- break;
- default:
- break;
- }
+ if (retval < 0)
+ continue;
+
+ switch (this_leaf.eax.split.level) {
+ case 1:
+ if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
+ new_l1d = this_leaf.size/1024;
+ else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
+ new_l1i = this_leaf.size/1024;
+ break;
+ case 2:
+ new_l2 = this_leaf.size/1024;
+ num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+ l2_id = c->apicid & ~((1 << index_msb) - 1);
+ break;
+ case 3:
+ new_l3 = this_leaf.size/1024;
+ num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+ l3_id = c->apicid & ~((1 << index_msb) - 1);
+ break;
+ default:
+ break;
}
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index ddc72f83933..5ac2d1fb28b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -153,7 +153,7 @@ static void raise_mce(struct mce *m)
return;
#ifdef CONFIG_X86_LOCAL_APIC
- if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
+ if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
unsigned long start;
int cpu;
@@ -167,7 +167,7 @@ static void raise_mce(struct mce *m)
cpumask_clear_cpu(cpu, mce_inject_cpumask);
}
if (!cpumask_empty(mce_inject_cpumask)) {
- if (m->inject_flags & MCJ_IRQ_BRAODCAST) {
+ if (m->inject_flags & MCJ_IRQ_BROADCAST) {
/*
* don't wait because mce_irq_ipi is necessary
* to be sync with following raise_local
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index beb1f1689e5..e2703520d12 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -110,22 +110,17 @@ static struct severity {
/* known AR MCACODs: */
#ifdef CONFIG_MEMORY_FAILURE
MCESEV(
- KEEP, "HT thread notices Action required: data load error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
- MCGMASK(MCG_STATUS_EIPV, 0)
+ KEEP, "Action required but unaffected thread is continuable",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
+ MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
),
MCESEV(
- AR, "Action required: data load error",
+ AR, "Action required: data load error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
USER
),
MCESEV(
- KEEP, "HT thread notices Action required: instruction fetch error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
- MCGMASK(MCG_STATUS_EIPV, 0)
- ),
- MCESEV(
- AR, "Action required: instruction fetch error",
+ AR, "Action required: instruction fetch error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
USER
),
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9239504b41c..bf49cdbb010 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -89,7 +89,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
-/* MCA banks polled by the period polling timer for corrected events */
+/*
+ * MCA banks polled by the period polling timer for corrected events.
+ * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
+ */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index ae1697c2afe..d56405309dc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -24,6 +24,18 @@
* Also supports reliable discovery of shared banks.
*/
+/*
+ * CMCI can be delivered to multiple cpus that share a machine check bank
+ * so we need to designate a single cpu to process errors logged in each bank
+ * in the interrupt handler (otherwise we would have many races and potential
+ * double reporting of the same error).
+ * Note that this can change when a cpu is offlined or brought online since
+ * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
+ * disables CMCI on all banks owned by the cpu and clears this bitfield. At
+ * this point, cmci_rediscover() kicks in and a different cpu may end up
+ * taking ownership of some of the shared MCA banks that were previously
+ * owned by the offlined cpu.
+ */
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 47a1870279a..2f3a7995e56 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -29,6 +29,7 @@
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
+#include <asm/trace/irq_vectors.h>
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
@@ -378,15 +379,26 @@ static void unexpected_thermal_interrupt(void)
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
-asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+static inline void __smp_thermal_interrupt(void)
{
- irq_enter();
- exit_idle();
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
- irq_exit();
- /* Ack only at the end to avoid potential reentry */
- ack_APIC_irq();
+}
+
+asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ __smp_thermal_interrupt();
+ exiting_ack_irq();
+}
+
+asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
+ __smp_thermal_interrupt();
+ trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
+ exiting_ack_irq();
}
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index aa578cadb94..fe6b1c86645 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -8,6 +8,7 @@
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
+#include <asm/trace/irq_vectors.h>
static void default_threshold_interrupt(void)
{
@@ -17,13 +18,24 @@ static void default_threshold_interrupt(void)
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
-asmlinkage void smp_threshold_interrupt(void)
+static inline void __smp_threshold_interrupt(void)
{
- irq_enter();
- exit_idle();
inc_irq_stat(irq_threshold_count);
mce_threshold_vector();
- irq_exit();
- /* Ack only at the end to avoid potential reentry */
- ack_APIC_irq();
+}
+
+asmlinkage void smp_threshold_interrupt(void)
+{
+ entering_irq();
+ __smp_threshold_interrupt();
+ exiting_ack_irq();
+}
+
+asmlinkage void smp_trace_threshold_interrupt(void)
+{
+ entering_irq();
+ trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
+ __smp_threshold_interrupt();
+ trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
+ exiting_ack_irq();
}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 35ffda5d072..5f90b85ff22 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -714,15 +714,15 @@ int __init mtrr_cleanup(unsigned address_bits)
if (mtrr_tom2)
x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
- nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
/*
* [0, 1M) should always be covered by var mtrr with WB
* and fixed mtrrs should take effect before var mtrr for it:
*/
- nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
+ nr_range = add_range_with_merge(range, RANGE_NUM, 0, 0,
1ULL<<(20 - PAGE_SHIFT));
- /* Sort the ranges: */
- sort_range(range, nr_range);
+ /* add from var mtrr at last */
+ nr_range = x86_get_mtrr_mem_range(range, nr_range,
+ x_remove_base, x_remove_size);
range_sums = sum_ranges(range, nr_range);
printk(KERN_INFO "total RAM covered: %ldM\n",
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 68a3343e579..9e451b0876b 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -167,7 +167,7 @@ static void post_set(void)
setCx86(CX86_CCR3, ccr3);
/* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
+ write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fa72a39e5d4..d4cdfa67509 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -510,8 +510,9 @@ generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
static void generic_get_mtrr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type *type)
{
- unsigned int mask_lo, mask_hi, base_lo, base_hi;
- unsigned int tmp, hi;
+ u32 mask_lo, mask_hi, base_lo, base_hi;
+ unsigned int hi;
+ u64 tmp, mask;
/*
* get_mtrr doesn't need to update mtrr_state, also it could be called
@@ -532,18 +533,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
/* Work out the shifted address mask: */
- tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
- mask_lo = size_or_mask | tmp;
+ tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
+ mask = size_or_mask | tmp;
/* Expand tmp with high bits to all 1s: */
- hi = fls(tmp);
+ hi = fls64(tmp);
if (hi > 0) {
- tmp |= ~((1<<(hi - 1)) - 1);
+ tmp |= ~((1ULL<<(hi - 1)) - 1);
- if (tmp != mask_lo) {
+ if (tmp != mask) {
printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- mask_lo = tmp;
+ mask = tmp;
}
}
@@ -551,8 +552,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
* This works correctly if size is a power of two, i.e. a
* contiguous range:
*/
- *size = -mask_lo;
- *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+ *size = -mask;
+ *base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
*type = base_lo & 0xff;
out_put_cpu:
@@ -701,7 +702,7 @@ static void post_set(void) __releases(set_atomicity_lock)
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
+ write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 726bf963c22..ca22b73aaa2 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -305,7 +305,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return -EINVAL;
}
- if (base & size_or_mask || size & size_or_mask) {
+ if ((base | (base + size - 1)) >>
+ (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
pr_warning("mtrr: base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -583,6 +584,7 @@ static struct syscore_ops mtrr_syscore_ops = {
int __initdata changed_by_mtrr_cleanup;
+#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
@@ -600,7 +602,7 @@ void __init mtrr_bp_init(void)
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
- size_or_mask = 0xff000000; /* 36 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(36);
size_and_mask = 0x00f00000;
phys_addr = 36;
@@ -619,7 +621,7 @@ void __init mtrr_bp_init(void)
boot_cpu_data.x86_mask == 0x4))
phys_addr = 36;
- size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
+ size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
boot_cpu_data.x86 == 6) {
@@ -627,7 +629,7 @@ void __init mtrr_bp_init(void)
* VIA C* family have Intel style MTRRs,
* but don't support PAE
*/
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
phys_addr = 32;
}
@@ -637,21 +639,21 @@ void __init mtrr_bp_init(void)
if (cpu_has_k6_mtrr) {
/* Pre-Athlon (K6) AMD CPU MTRRs */
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CENTAUR:
if (cpu_has_centaur_mcr) {
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CYRIX:
if (cpu_has_cyrix_arr) {
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1025f3c99d2..9e581c5cf6d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event)
* check that PEBS LBR correction does not conflict with
* whatever the user is asking with attr->branch_sample_type
*/
- if (event->attr.precise_ip > 1) {
+ if (event->attr.precise_ip > 1 &&
+ x86_pmu.intel_cap.pebs_format < 2) {
u64 *br_type = &event->attr.branch_sample_type;
if (has_branch_stack(event)) {
@@ -568,7 +569,7 @@ struct sched_state {
struct perf_sched {
int max_weight;
int max_events;
- struct event_constraint **constraints;
+ struct perf_event **events;
struct sched_state state;
int saved_states;
struct sched_state saved[SCHED_STATES_MAX];
@@ -577,7 +578,7 @@ struct perf_sched {
/*
* Initialize interator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
int num, int wmin, int wmax)
{
int idx;
@@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
- sched->constraints = c;
+ sched->events = events;
for (idx = 0; idx < num; idx++) {
- if (c[idx]->weight == wmin)
+ if (events[idx]->hw.constraint->weight == wmin)
break;
}
@@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
if (sched->state.event >= sched->max_events)
return false;
- c = sched->constraints[sched->state.event];
-
+ c = sched->events[sched->state.event]->hw.constraint;
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
@@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
if (sched->state.weight > sched->max_weight)
return false;
}
- c = sched->constraints[sched->state.event];
+ c = sched->events[sched->state.event]->hw.constraint;
} while (c->weight != sched->state.weight);
sched->state.counter = 0; /* start with first counter */
@@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
/*
* Assign a counter for each event.
*/
-int perf_assign_events(struct event_constraint **constraints, int n,
+int perf_assign_events(struct perf_event **events, int n,
int wmin, int wmax, int *assign)
{
struct perf_sched sched;
- perf_sched_init(&sched, constraints, n, wmin, wmax);
+ perf_sched_init(&sched, events, n, wmin, wmax);
do {
if (!perf_sched_find_counter(&sched))
@@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n,
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
- struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
+ struct event_constraint *c;
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ struct perf_event *e;
int i, wmin, wmax, num = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ hwc = &cpuc->event_list[i]->hw;
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
- constraints[i] = c;
+ hwc->constraint = c;
+
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
*/
for (i = 0; i < n; i++) {
hwc = &cpuc->event_list[i]->hw;
- c = constraints[i];
+ c = hwc->constraint;
/* never assigned */
if (hwc->idx == -1)
@@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/* slow path */
if (i != n)
- num = perf_assign_events(constraints, n, wmin, wmax, assign);
+ num = perf_assign_events(cpuc->event_list, n, wmin,
+ wmax, assign);
/*
+ * Mark the event as committed, so we do not put_constraint()
+ * in case new events are added and fail scheduling.
+ */
+ if (!num && assign) {
+ for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+ }
+ }
+ /*
* scheduling failed or is just a simulation,
* free resources if necessary
*/
if (!assign || num) {
for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ /*
+ * do not put_constraint() on comitted events,
+ * because they are good to go
+ */
+ if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
+ continue;
+
if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+ x86_pmu.put_event_constraints(cpuc, e);
}
}
return num ? -EINVAL : 0;
@@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags)
int i;
/*
+ * event is descheduled
+ */
+ event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
+
+ /*
* If we're called during a txn, we don't need to do anything.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
@@ -1249,10 +1276,20 @@ void perf_events_lapic_init(void)
static int __kprobes
perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
+ int ret;
+ u64 start_clock;
+ u64 finish_clock;
+
if (!atomic_read(&active_events))
return NMI_DONE;
- return x86_pmu.handle_irq(regs);
+ start_clock = local_clock();
+ ret = x86_pmu.handle_irq(regs);
+ finish_clock = local_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+
+ return ret;
}
struct event_constraint emptyconstraint;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ba9aadfa683..97e557bc4c9 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -63,10 +63,12 @@ struct event_constraint {
int flags;
};
/*
- * struct event_constraint flags
+ * struct hw_perf_event.flags flags
*/
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
+#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -227,11 +229,14 @@ struct cpu_hw_events {
* - inv
* - edge
* - cnt-mask
+ * - in_tx
+ * - in_tx_checkpointed
* The other filters are supported by fixed counters.
* The any-thread option is supported starting with v3.
*/
+#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
#define FIXED_EVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
+ EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
/*
* Constraint on the Event code + UMask
@@ -247,6 +252,11 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+/* DataLA version of store sampling without extra enable bit. */
+#define INTEL_PST_HSW_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
#define EVENT_CONSTRAINT_END \
EVENT_CONSTRAINT(0, 0, 0)
@@ -301,6 +311,11 @@ union perf_capabilities {
u64 pebs_arch_reg:1;
u64 pebs_format:4;
u64 smm_freeze:1;
+ /*
+ * PMU supports separate counter range for writing
+ * values > 32bit.
+ */
+ u64 full_width_write:1;
};
u64 capabilities;
};
@@ -375,6 +390,7 @@ struct x86_pmu {
struct event_constraint *event_constraints;
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
+ bool late_ack;
/*
* sysfs attrs
@@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
void x86_pmu_enable_all(int added);
-int perf_assign_events(struct event_constraint **constraints, int n,
+int perf_assign_events(struct perf_event **events, int n,
int wmin, int wmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
@@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
extern struct event_constraint intel_ivb_pebs_event_constraints[];
+extern struct event_constraint intel_hsw_pebs_event_constraints[];
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 7e28d9467bb..4cbe03287b0 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
};
-static int setup_event_constraints(void)
+static int __init amd_core_pmu_init(void)
{
- if (boot_cpu_data.x86 == 0x15)
+ if (!cpu_has_perfctr_core)
+ return 0;
+
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ pr_cont("Fam15h ");
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
- return 0;
-}
+ break;
-static int setup_perfctr_core(void)
-{
- if (!cpu_has_perfctr_core) {
- WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
- KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
+ default:
+ pr_err("core perfctr but no constraints; unknown hardware!\n");
return -ENODEV;
}
- WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
- KERN_ERR "hw perf events core counters need constraints handler!");
-
/*
* If core performance counter extensions exists, we must use
* MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
- * x86_pmu_addr_offset().
+ * amd_pmu_addr_offset().
*/
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
- printk(KERN_INFO "perf: AMD core performance counters detected\n");
-
+ pr_cont("core perfctr, ");
return 0;
}
__init int amd_pmu_init(void)
{
+ int ret;
+
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;
x86_pmu = amd_pmu;
- setup_event_constraints();
- setup_perfctr_core();
+ ret = amd_core_pmu_init();
+ if (ret)
+ return ret;
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
new file mode 100644
index 00000000000..0db655ef391
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+
+#include "perf_event.h"
+#include "perf_event_amd_iommu.h"
+
+#define COUNTER_SHIFT 16
+
+#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
+#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
+
+/* iommu pmu config masks */
+#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
+#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
+#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
+#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
+#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
+#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
+#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
+
+static struct perf_amd_iommu __perf_iommu;
+
+struct perf_amd_iommu {
+ struct pmu pmu;
+ u8 max_banks;
+ u8 max_counters;
+ u64 cntr_assign_mask;
+ raw_spinlock_t lock;
+ const struct attribute_group *attr_groups[4];
+};
+
+#define format_group attr_groups[0]
+#define cpumask_group attr_groups[1]
+#define events_group attr_groups[2]
+#define null_group attr_groups[3]
+
+/*---------------------------------------------
+ * sysfs format attributes
+ *---------------------------------------------*/
+PMU_FORMAT_ATTR(csource, "config:0-7");
+PMU_FORMAT_ATTR(devid, "config:8-23");
+PMU_FORMAT_ATTR(pasid, "config:24-39");
+PMU_FORMAT_ATTR(domid, "config:40-55");
+PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
+PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
+PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
+
+static struct attribute *iommu_format_attrs[] = {
+ &format_attr_csource.attr,
+ &format_attr_devid.attr,
+ &format_attr_pasid.attr,
+ &format_attr_domid.attr,
+ &format_attr_devid_mask.attr,
+ &format_attr_pasid_mask.attr,
+ &format_attr_domid_mask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_iommu_format_group = {
+ .name = "format",
+ .attrs = iommu_format_attrs,
+};
+
+/*---------------------------------------------
+ * sysfs events attributes
+ *---------------------------------------------*/
+struct amd_iommu_event_desc {
+ struct kobj_attribute attr;
+ const char *event;
+};
+
+static ssize_t _iommu_event_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amd_iommu_event_desc *event =
+ container_of(attr, struct amd_iommu_event_desc, attr);
+ return sprintf(buf, "%s\n", event->event);
+}
+
+#define AMD_IOMMU_EVENT_DESC(_name, _event) \
+{ \
+ .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
+ .event = _event, \
+}
+
+static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
+ AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
+ AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
+ AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
+ AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
+ AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
+ AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
+ AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
+ AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
+ AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
+ AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
+ AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
+ AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
+ { /* end: all zeroes */ },
+};
+
+/*---------------------------------------------
+ * sysfs cpumask attributes
+ *---------------------------------------------*/
+static cpumask_t iommu_cpumask;
+
+static ssize_t _iommu_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask);
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ return n;
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
+
+static struct attribute *iommu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_iommu_cpumask_group = {
+ .attrs = iommu_cpumask_attrs,
+};
+
+/*---------------------------------------------*/
+
+static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
+{
+ unsigned long flags;
+ int shift, bank, cntr, retval;
+ int max_banks = perf_iommu->max_banks;
+ int max_cntrs = perf_iommu->max_counters;
+
+ raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+
+ for (bank = 0, shift = 0; bank < max_banks; bank++) {
+ for (cntr = 0; cntr < max_cntrs; cntr++) {
+ shift = bank + (bank*3) + cntr;
+ if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
+ continue;
+ } else {
+ perf_iommu->cntr_assign_mask |= (1ULL<<shift);
+ retval = ((u16)((u16)bank<<8) | (u8)(cntr));
+ goto out;
+ }
+ }
+ }
+ retval = -ENOSPC;
+out:
+ raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+ return retval;
+}
+
+static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
+ u8 bank, u8 cntr)
+{
+ unsigned long flags;
+ int max_banks, max_cntrs;
+ int shift = 0;
+
+ max_banks = perf_iommu->max_banks;
+ max_cntrs = perf_iommu->max_counters;
+
+ if ((bank > max_banks) || (cntr > max_cntrs))
+ return -EINVAL;
+
+ shift = bank + cntr + (bank*3);
+
+ raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+ perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
+ raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+
+ return 0;
+}
+
+static int perf_iommu_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_amd_iommu *perf_iommu;
+ u64 config, config1;
+
+ /* test the event attr type check for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * IOMMU counters are shared across all cores.
+ * Therefore, it does not support per-process mode.
+ * Also, it does not support event sampling mode.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* IOMMU counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ perf_iommu = &__perf_iommu;
+
+ if (event->pmu != &perf_iommu->pmu)
+ return -ENOENT;
+
+ if (perf_iommu) {
+ config = event->attr.config;
+ config1 = event->attr.config1;
+ } else {
+ return -EINVAL;
+ }
+
+ /* integrate with iommu base devid (0000), assume one iommu */
+ perf_iommu->max_banks =
+ amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
+ perf_iommu->max_counters =
+ amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
+ if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
+ return -EINVAL;
+
+ /* update the hw_perf_event struct with the iommu config data */
+ hwc->config = config;
+ hwc->extra_reg.config = config1;
+
+ return 0;
+}
+
+static void perf_iommu_enable_event(struct perf_event *ev)
+{
+ u8 csource = _GET_CSOURCE(ev);
+ u16 devid = _GET_DEVID(ev);
+ u64 reg = 0ULL;
+
+ reg = csource;
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+
+ reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_DEVID_MATCH_REG, &reg, true);
+
+ reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_PASID_MATCH_REG, &reg, true);
+
+ reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_DOMID_MATCH_REG, &reg, true);
+}
+
+static void perf_iommu_disable_event(struct perf_event *event)
+{
+ u64 reg = 0ULL;
+
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+}
+
+static void perf_iommu_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ pr_debug("perf: amd_iommu:perf_iommu_start\n");
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ if (flags & PERF_EF_RELOAD) {
+ u64 prev_raw_count = local64_read(&hwc->prev_count);
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
+ }
+
+ perf_iommu_enable_event(event);
+ perf_event_update_userpage(event);
+
+}
+
+static void perf_iommu_read(struct perf_event *event)
+{
+ u64 count = 0ULL;
+ u64 prev_raw_count = 0ULL;
+ u64 delta = 0ULL;
+ struct hw_perf_event *hwc = &event->hw;
+ pr_debug("perf: amd_iommu:perf_iommu_read\n");
+
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_REG, &count, false);
+
+ /* IOMMU pc counter register is only 48 bits */
+ count &= 0xFFFFFFFFFFFFULL;
+
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ count) != prev_raw_count)
+ return;
+
+ /* Handling 48-bit counter overflowing */
+ delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
+ delta >>= COUNTER_SHIFT;
+ local64_add(delta, &event->count);
+
+}
+
+static void perf_iommu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 config;
+
+ pr_debug("perf: amd_iommu:perf_iommu_stop\n");
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ perf_iommu_disable_event(event);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ config = hwc->config;
+ perf_iommu_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_iommu_add(struct perf_event *event, int flags)
+{
+ int retval;
+ struct perf_amd_iommu *perf_iommu =
+ container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+ pr_debug("perf: amd_iommu:perf_iommu_add\n");
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* request an iommu bank/counter */
+ retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
+ if (retval != -ENOSPC)
+ event->hw.extra_reg.reg = (u16)retval;
+ else
+ return retval;
+
+ if (flags & PERF_EF_START)
+ perf_iommu_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void perf_iommu_del(struct perf_event *event, int flags)
+{
+ struct perf_amd_iommu *perf_iommu =
+ container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+ pr_debug("perf: amd_iommu:perf_iommu_del\n");
+ perf_iommu_stop(event, PERF_EF_UPDATE);
+
+ /* clear the assigned iommu bank/counter */
+ clear_avail_iommu_bnk_cntr(perf_iommu,
+ _GET_BANK(event),
+ _GET_CNTR(event));
+
+ perf_event_update_userpage(event);
+}
+
+static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
+{
+ struct attribute **attrs;
+ struct attribute_group *attr_group;
+ int i = 0, j;
+
+ while (amd_iommu_v2_event_descs[i].attr.attr.name)
+ i++;
+
+ attr_group = kzalloc(sizeof(struct attribute *)
+ * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
+ if (!attr_group)
+ return -ENOMEM;
+
+ attrs = (struct attribute **)(attr_group + 1);
+ for (j = 0; j < i; j++)
+ attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
+
+ attr_group->name = "events";
+ attr_group->attrs = attrs;
+ perf_iommu->events_group = attr_group;
+
+ return 0;
+}
+
+static __init void amd_iommu_pc_exit(void)
+{
+ if (__perf_iommu.events_group != NULL) {
+ kfree(__perf_iommu.events_group);
+ __perf_iommu.events_group = NULL;
+ }
+}
+
+static __init int _init_perf_amd_iommu(
+ struct perf_amd_iommu *perf_iommu, char *name)
+{
+ int ret;
+
+ raw_spin_lock_init(&perf_iommu->lock);
+
+ /* Init format attributes */
+ perf_iommu->format_group = &amd_iommu_format_group;
+
+ /* Init cpumask attributes to only core 0 */
+ cpumask_set_cpu(0, &iommu_cpumask);
+ perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
+
+ /* Init events attributes */
+ if (_init_events_attrs(perf_iommu) != 0)
+ pr_err("perf: amd_iommu: Only support raw events.\n");
+
+ /* Init null attributes */
+ perf_iommu->null_group = NULL;
+ perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
+
+ ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
+ if (ret) {
+ pr_err("perf: amd_iommu: Failed to initialized.\n");
+ amd_iommu_pc_exit();
+ } else {
+ pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
+ amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
+ amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
+ }
+
+ return ret;
+}
+
+static struct perf_amd_iommu __perf_iommu = {
+ .pmu = {
+ .event_init = perf_iommu_event_init,
+ .add = perf_iommu_add,
+ .del = perf_iommu_del,
+ .start = perf_iommu_start,
+ .stop = perf_iommu_stop,
+ .read = perf_iommu_read,
+ },
+ .max_banks = 0x00,
+ .max_counters = 0x00,
+ .cntr_assign_mask = 0ULL,
+ .format_group = NULL,
+ .cpumask_group = NULL,
+ .events_group = NULL,
+ .null_group = NULL,
+};
+
+static __init int amd_iommu_pc_init(void)
+{
+ /* Make sure the IOMMU PC resource is available */
+ if (!amd_iommu_pc_supported()) {
+ pr_err("perf: amd_iommu PMU not installed. No support!\n");
+ return -ENODEV;
+ }
+
+ _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
+
+ return 0;
+}
+
+device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
new file mode 100644
index 00000000000..845d173278e
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _PERF_EVENT_AMD_IOMMU_H_
+#define _PERF_EVENT_AMD_IOMMU_H_
+
+/* iommu pc mmio region register indexes */
+#define IOMMU_PC_COUNTER_REG 0x00
+#define IOMMU_PC_COUNTER_SRC_REG 0x08
+#define IOMMU_PC_PASID_MATCH_REG 0x10
+#define IOMMU_PC_DOMID_MATCH_REG 0x18
+#define IOMMU_PC_DEVID_MATCH_REG 0x20
+#define IOMMU_PC_COUNTER_REPORT_REG 0x28
+
+/* maximun specified bank/counters */
+#define PC_MAX_SPEC_BNKS 64
+#define PC_MAX_SPEC_CNTRS 16
+
+/* iommu pc reg masks*/
+#define IOMMU_BASE_DEVID 0x0000
+
+/* amd_iommu_init.c external support functions */
+extern bool amd_iommu_pc_supported(void);
+
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+
+extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
+ u8 fxn, u64 *value, bool is_write);
+
+#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f60d41ff9a9..fbc9210b45b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/export.h>
+#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/apic.h>
@@ -165,13 +166,13 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
- INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = {
NULL,
};
+static struct event_constraint intel_hsw_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
+ /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+ INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
+ /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+ INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
return true;
/* implicit branch sampling to correct PEBS skid */
- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+ if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
+ x86_pmu.intel_cap.pebs_format < 2)
return true;
return false;
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_events);
/*
- * Some chipsets need to unmask the LVTPC in a particular spot
- * inside the nmi handler. As a result, the unmasking was pushed
- * into all the nmi handlers.
- *
- * This handler doesn't seem to have any issues with the unmasking
- * so it was left at the top.
+ * No known reason to not always do late ACK,
+ * but just in case do it opt-in.
*/
- apic_write(APIC_LVTPC, APIC_DM_NMI);
-
+ if (!x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_pmu_disable_all();
handled = intel_pmu_drain_bts_buffer();
status = intel_pmu_get_status();
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
again:
intel_pmu_ack_status(status);
if (++loops > 100) {
- WARN_ONCE(1, "perfevents: irq loop stuck!\n");
- perf_event_print_debug();
+ static bool warned = false;
+ if (!warned) {
+ WARN(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ warned = true;
+ }
intel_pmu_reset();
goto done;
}
@@ -1235,6 +1253,13 @@ again:
done:
intel_pmu_enable_all(0);
+ /*
+ * Only unmask the NMI after the overflow counters
+ * have been reset. This avoids spurious NMIs on
+ * Haswell CPUs.
+ */
+ if (x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
return handled;
}
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
if ((event->hw.config & c->cmask) == c->code) {
- /* hw.flags zeroed at initialization */
event->hw.flags |= c->flags;
return c;
}
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- event->hw.flags = 0;
intel_put_shared_regs_event_constraints(cpuc, event);
}
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added)
}
}
+static int hsw_hw_config(struct perf_event *event)
+{
+ int ret = intel_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+ if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
+ return 0;
+ event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
+
+ /*
+ * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
+ * PEBS or in ANY thread mode. Since the results are non-sensical forbid
+ * this combination.
+ */
+ if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
+ ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
+ event->attr.precise_ip > 0))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static struct event_constraint counter2_constraint =
+ EVENT_CONSTRAINT(0, 0x4, 0);
+
+static struct event_constraint *
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct event_constraint *c = intel_get_event_constraints(cpuc, event);
+
+ /* Handle special quirk on in_tx_checkpointed only in counter 2 */
+ if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
+ if (c->idxmsk64 & (1U << 2))
+ return &counter2_constraint;
+ return &emptyconstraint;
+ }
+
+ return c;
+}
+
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" );
PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );
+PMU_FORMAT_ATTR(in_tx, "config:32");
+PMU_FORMAT_ATTR(in_tx_cp, "config:33");
static struct attribute *intel_arch_formats_attr[] = {
&format_attr_event.attr,
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_any.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
+ &format_attr_in_tx.attr,
+ &format_attr_in_tx_cp.attr,
&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
&format_attr_ldlat.attr, /* PEBS load latency */
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void)
}
}
+EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
+
+static struct attribute *hsw_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ NULL
+};
+
__init int intel_pmu_init(void)
{
union cpuid10_edx edx;
@@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void)
break;
+ case 60: /* Haswell Client */
+ case 70:
+ case 71:
+ case 63:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_snb();
+
+ x86_pmu.event_constraints = intel_hsw_event_constraints;
+ x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_snb_extra_regs;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.cpu_events = hsw_events_attrs;
+ pr_cont("Haswell events, ");
+ break;
+
default:
switch (x86_pmu.version) {
case 1:
@@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void)
* counter, so do not extend mask to generic counters
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask != X86_RAW_EVENT_MASK
+ if (c->cmask != FIXED_EVENT_FLAGS
|| c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
continue;
}
@@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void)
}
}
+ /* Support full width counters using alternative MSR range */
+ if (x86_pmu.intel_cap.full_width_write) {
+ x86_pmu.max_period = x86_pmu.cntval_mask;
+ x86_pmu.perfctr = MSR_IA32_PMC0;
+ pr_cont("full-width counters, ");
+ }
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 60250f68705..3065c57a63c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
return val;
}
+static u64 precise_store_data_hsw(u64 status)
+{
+ union perf_mem_data_src dse;
+
+ dse.val = 0;
+ dse.mem_op = PERF_MEM_OP_STORE;
+ dse.mem_lvl = PERF_MEM_LVL_NA;
+ if (status & 1)
+ dse.mem_lvl = PERF_MEM_LVL_L1;
+ /* Nothing else supported. Sorry. */
+ return dse.val;
+}
+
static u64 load_latency_data(u64 status)
{
union intel_x86_pebs_dse dse;
@@ -165,6 +178,22 @@ struct pebs_record_nhm {
u64 status, dla, dse, lat;
};
+/*
+ * Same as pebs_record_nhm, with two additional fields.
+ */
+struct pebs_record_hsw {
+ struct pebs_record_nhm nhm;
+ /*
+ * Real IP of the event. In the Intel documentation this
+ * is called eventingrip.
+ */
+ u64 real_ip;
+ /*
+ * TSX tuning information field: abort cycles and abort flags.
+ */
+ u64 tsx_tuning;
+};
+
void init_debug_store_on_cpu(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_hsw_pebs_event_constraints[] = {
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
+ INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
+ INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
+ /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+ INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
+ /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+ INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+ INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+ /* MEM_UOPS_RETIRED.SPLIT_STORES */
+ INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+ INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+ INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
+ INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
+ INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
+ /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
+ INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
+ /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
+ INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
+ /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
+ INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
+ /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
+ INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
+ INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
+
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+
+ if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
+ cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
+ else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
+ cpuc->pebs_enabled &= ~(1ULL << 63);
+
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
*/
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_nhm *pebs = __pebs;
+ struct pebs_record_hsw *pebs_hsw = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
u64 sample_type;
@@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
return;
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
- fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
+ fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
+ PERF_X86_EVENT_PEBS_ST_HSW);
perf_sample_data_init(&data, 0, event->hw.last_period);
@@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* if PEBS-LL or PreciseStore
*/
if (fll || fst) {
- if (sample_type & PERF_SAMPLE_ADDR)
- data.addr = pebs->dla;
-
/*
* Use latency for weight (only avail with PEBS-LL)
*/
@@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
if (sample_type & PERF_SAMPLE_DATA_SRC) {
if (fll)
data.data_src.val = load_latency_data(pebs->dse);
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+ data.data_src.val =
+ precise_store_data_hsw(pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
@@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.bp = pebs->bp;
regs.sp = pebs->sp;
- if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
+ if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
+ regs.ip = pebs_hsw->real_ip;
+ regs.flags |= PERF_EFLAGS_EXACT;
+ } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
regs.flags |= PERF_EFLAGS_EXACT;
else
regs.flags &= ~PERF_EFLAGS_EXACT;
+ if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
+ x86_pmu.intel_cap.pebs_format >= 1)
+ data.addr = pebs->dla;
+
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
@@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
__intel_pmu_pebs_event(event, iregs, at);
}
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
+ void *top)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds;
- struct pebs_record_nhm *at, *top;
struct perf_event *event = NULL;
u64 status = 0;
- int bit, n;
-
- if (!x86_pmu.pebs_active)
- return;
-
- at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
- top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+ int bit;
ds->pebs_index = ds->pebs_buffer_base;
- n = top - at;
- if (n <= 0)
- return;
-
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
+ for (; at < top; at += x86_pmu.pebs_record_size) {
+ struct pebs_record_nhm *p = at;
- for ( ; at < top; at++) {
- for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
+ for_each_set_bit(bit, (unsigned long *)&p->status,
+ x86_pmu.max_pebs_events) {
event = cpuc->events[bit];
if (!test_bit(bit, cpuc->active_mask))
continue;
@@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
}
}
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct pebs_record_nhm *at, *top;
+ int n;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
+ ds->pebs_index = ds->pebs_buffer_base;
+
+ n = top - at;
+ if (n <= 0)
+ return;
+
+ /*
+ * Should not happen, we program the threshold at 1 and do not
+ * set a reset value.
+ */
+ WARN_ONCE(n > x86_pmu.max_pebs_events,
+ "Unexpected number of pebs records %d\n", n);
+
+ return __intel_pmu_drain_pebs_nhm(iregs, at, top);
+}
+
+static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct pebs_record_hsw *at, *top;
+ int n;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
+
+ n = top - at;
+ if (n <= 0)
+ return;
+ /*
+ * Should not happen, we program the threshold at 1 and do not
+ * set a reset value.
+ */
+ WARN_ONCE(n > x86_pmu.max_pebs_events,
+ "Unexpected number of pebs records %d\n", n);
+
+ return __intel_pmu_drain_pebs_nhm(iregs, at, top);
+}
+
/*
* BTS, PEBS probe and setup
*/
@@ -888,6 +1010,12 @@ void intel_ds_init(void)
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
break;
+ case 2:
+ pr_cont("PEBS fmt2%c, ", pebs_type);
+ x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+ break;
+
default:
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d978353c939..d5be06a5005 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,16 @@ enum {
LBR_FORMAT_LIP = 0x01,
LBR_FORMAT_EIP = 0x02,
LBR_FORMAT_EIP_FLAGS = 0x03,
+ LBR_FORMAT_EIP_FLAGS2 = 0x04,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
+};
+
+static enum {
+ LBR_EIP_FLAGS = 1,
+ LBR_TSX = 2,
+} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
+ [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
+ [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
};
/*
@@ -56,6 +66,8 @@ enum {
LBR_FAR)
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
+#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
+#define LBR_FROM_FLAG_ABORT (1ULL << 61)
#define for_each_branch_sample_type(x) \
for ((x) = PERF_SAMPLE_BRANCH_USER; \
@@ -81,9 +93,13 @@ enum {
X86_BR_JMP = 1 << 9, /* jump */
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+ X86_BR_ABORT = 1 << 12,/* transaction abort */
+ X86_BR_IN_TX = 1 << 13,/* in transaction */
+ X86_BR_NO_TX = 1 << 14,/* not in transaction */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
#define X86_BR_ANY \
(X86_BR_CALL |\
@@ -95,6 +111,7 @@ enum {
X86_BR_JCC |\
X86_BR_JMP |\
X86_BR_IRQ |\
+ X86_BR_ABORT |\
X86_BR_IND_CALL)
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
unsigned long lbr_idx = (tos - i) & mask;
- u64 from, to, mis = 0, pred = 0;
+ u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
+ int skip = 0;
+ int lbr_flags = lbr_desc[lbr_format];
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
- if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
+ if (lbr_flags & LBR_EIP_FLAGS) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
- from = (u64)((((s64)from) << 1) >> 1);
+ skip = 1;
+ }
+ if (lbr_flags & LBR_TSX) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
}
+ from = (u64)((((s64)from) << skip) >> skip);
cpuc->lbr_entries[i].from = from;
cpuc->lbr_entries[i].to = to;
cpuc->lbr_entries[i].mispred = mis;
cpuc->lbr_entries[i].predicted = pred;
+ cpuc->lbr_entries[i].in_tx = in_tx;
+ cpuc->lbr_entries[i].abort = abort;
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
@@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void)
* - in case there is no HW filter
* - in case the HW filter has errata or limitations
*/
-static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
{
u64 br_type = event->attr.branch_sample_type;
int mask = 0;
@@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_USER)
mask |= X86_BR_USER;
- if (br_type & PERF_SAMPLE_BRANCH_KERNEL) {
- if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
mask |= X86_BR_KERNEL;
- }
/* we ignore BRANCH_HV here */
@@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
mask |= X86_BR_IND_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
+ mask |= X86_BR_ABORT;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
+ mask |= X86_BR_IN_TX;
+
+ if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
+ mask |= X86_BR_NO_TX;
+
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
*/
event->hw.branch_reg.reg = mask;
-
- return 0;
}
/*
@@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
/*
* setup SW LBR filter
*/
- ret = intel_pmu_setup_sw_lbr_filter(event);
- if (ret)
- return ret;
+ intel_pmu_setup_sw_lbr_filter(event);
/*
* setup HW LBR filter, if any
@@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
* decoded (e.g., text page not present), then X86_BR_NONE is
* returned.
*/
-static int branch_type(unsigned long from, unsigned long to)
+static int branch_type(unsigned long from, unsigned long to, int abort)
{
struct insn insn;
void *addr;
@@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
if (from == 0 || to == 0)
return X86_BR_NONE;
+ if (abort)
+ return X86_BR_ABORT | to_plm;
+
if (from_plm == X86_BR_USER) {
/*
* can happen if measuring at the user level only
@@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
from = cpuc->lbr_entries[i].from;
to = cpuc->lbr_entries[i].to;
- type = branch_type(from, to);
+ type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+ if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
+ if (cpuc->lbr_entries[i].in_tx)
+ type |= X86_BR_IN_TX;
+ else
+ type |= X86_BR_NO_TX;
+ }
/* if type does not correspond, then discard */
if (type == X86_BR_NONE || (br_sel & type) != type) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 52441a2af53..9dd99751ccf 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -536,7 +536,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve
if (!uncore_box_is_fake(box))
reg1->alloc |= alloc;
- return 0;
+ return NULL;
fail:
for (; i >= 0; i--) {
if (alloc & (0x1 << i))
@@ -644,7 +644,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
(!uncore_box_is_fake(box) && reg1->alloc))
return NULL;
again:
- mask = 0xff << (idx * 8);
+ mask = 0xffULL << (idx * 8);
raw_spin_lock_irqsave(&er->lock, flags);
if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
!((config1 ^ er->config) & mask)) {
@@ -1923,7 +1923,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif
{
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+ u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
u64 config = reg1->config;
/* get the non-shared control bits and shift them */
@@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per
static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
{
unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
- struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
+ struct event_constraint *c;
int i, wmin, wmax, ret = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ hwc = &box->event_list[i]->hw;
c = uncore_get_event_constraint(box, box->event_list[i]);
- constraints[i] = c;
+ hwc->constraint = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
/* fastpath, try to reuse previous register */
for (i = 0; i < n; i++) {
hwc = &box->event_list[i]->hw;
- c = constraints[i];
+ c = hwc->constraint;
/* never assigned */
if (hwc->idx == -1)
@@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
}
/* slow path */
if (i != n)
- ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+ ret = perf_assign_events(box->event_list, n,
+ wmin, wmax, assign);
if (!assign || ret) {
for (i = 0; i < n; i++)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f9528917f6e..47b3d00c9d8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -337,10 +337,10 @@
NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
-#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n)))
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
-#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n)))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
/*
* use the 9~13 bits to select event If the 7th bit is not set,
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 37a198bd48c..aee6317b902 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -37,8 +37,8 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
- c->hard_math ? "yes" : "no",
- c->hard_math ? "yes" : "no",
+ static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+ static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
c->cpuid_level,
c->wp_works_ok ? "yes" : "no");
}
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault.c
index 155a13f33ed..5d3fe8d36e4 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault.c
@@ -9,6 +9,8 @@
#include <asm/processor.h>
#include <asm/desc.h>
+#ifdef CONFIG_X86_32
+
#define DOUBLEFAULT_STACKSIZE (1024)
static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
@@ -67,3 +69,16 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
.__cr3 = __pa_nodebug(swapper_pg_dir),
}
};
+
+/* dummy for do_double_fault() call */
+void df_debug(struct pt_regs *regs, long error_code) {}
+
+#else /* !CONFIG_X86_32 */
+
+void df_debug(struct pt_regs *regs, long error_code)
+{
+ pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
+ show_regs(regs);
+ panic("Machine halted.");
+}
+#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 8f3e2dec1df..2cfbc3a3a2d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -801,7 +801,17 @@ ENTRY(name) \
CFI_ENDPROC; \
ENDPROC(name)
-#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
+
+#ifdef CONFIG_TRACING
+#define TRACE_BUILD_INTERRUPT(name, nr) \
+ BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
+#else
+#define TRACE_BUILD_INTERRUPT(name, nr)
+#endif
+
+#define BUILD_INTERRUPT(name, nr) \
+ BUILD_INTERRUPT3(name, nr, smp_##name); \
+ TRACE_BUILD_INTERRUPT(name, nr)
/* The include is where all of the SMP etc. interrupts come from */
#include <asm/entry_arch.h>
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 72720894103..1b69951a81e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -365,7 +365,7 @@ ENDPROC(native_usergs_sysret64)
/*CFI_REL_OFFSET ss,0*/
pushq_cfi %rax /* rsp */
CFI_REL_OFFSET rsp,0
- pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
+ pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
/*CFI_REL_OFFSET rflags,0*/
pushq_cfi $__KERNEL_CS /* cs */
/*CFI_REL_OFFSET cs,0*/
@@ -1138,7 +1138,7 @@ END(common_interrupt)
/*
* APIC interrupts.
*/
-.macro apicinterrupt num sym do_sym
+.macro apicinterrupt3 num sym do_sym
ENTRY(\sym)
INTR_FRAME
ASM_CLAC
@@ -1150,15 +1150,32 @@ ENTRY(\sym)
END(\sym)
.endm
+#ifdef CONFIG_TRACING
+#define trace(sym) trace_##sym
+#define smp_trace(sym) smp_trace_##sym
+
+.macro trace_apicinterrupt num sym
+apicinterrupt3 \num trace(\sym) smp_trace(\sym)
+.endm
+#else
+.macro trace_apicinterrupt num sym do_sym
+.endm
+#endif
+
+.macro apicinterrupt num sym do_sym
+apicinterrupt3 \num \sym \do_sym
+trace_apicinterrupt \num \sym
+.endm
+
#ifdef CONFIG_SMP
-apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
+apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
-apicinterrupt REBOOT_VECTOR \
+apicinterrupt3 REBOOT_VECTOR \
reboot_interrupt smp_reboot_interrupt
#endif
#ifdef CONFIG_X86_UV
-apicinterrupt UV_BAU_MESSAGE \
+apicinterrupt3 UV_BAU_MESSAGE \
uv_bau_message_intr1 uv_bau_message_interrupt
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
@@ -1167,14 +1184,19 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_HAVE_KVM
-apicinterrupt POSTED_INTR_VECTOR \
+apicinterrupt3 POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
+#endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
+#endif
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
@@ -1451,13 +1473,13 @@ ENTRY(xen_failsafe_callback)
CFI_ENDPROC
END(xen_failsafe_callback)
-apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
xen_hvm_callback_vector xen_evtchn_do_upcall
#endif /* CONFIG_XEN */
#if IS_ENABLED(CONFIG_HYPERV)
-apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
#endif /* CONFIG_HYPERV */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 73afd11799c..e65ddc62e11 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -444,7 +444,6 @@ is486:
orl %ecx,%eax
movl %eax,%cr0
- call check_x87
lgdt early_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
@@ -467,26 +466,6 @@ is486:
pushl $0 # fake return address for unwinder
jmp *(initial_code)
-/*
- * We depend on ET to be correct. This checks for 287/387.
- */
-check_x87:
- movb $0,X86_HARD_MATH
- clts
- fninit
- fstsw %ax
- cmpb $0,%al
- je 1f
- movl %cr0,%eax /* no coprocessor: have to set bits */
- xorl $4,%eax /* set EM */
- movl %eax,%cr0
- ret
- ALIGN
-1: movb $1,X86_HARD_MATH
- .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
- ret
-
-
#include "verify_cpu.S"
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 321d65ebaff..5e4d8a8a5c4 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -518,9 +518,15 @@ ENTRY(idt_table)
.skip IDT_ENTRIES * 16
.align L1_CACHE_BYTES
-ENTRY(nmi_idt_table)
+ENTRY(debug_idt_table)
.skip IDT_ENTRIES * 16
+#ifdef CONFIG_TRACING
+ .align L1_CACHE_BYTES
+ENTRY(trace_idt_table)
+ .skip IDT_ENTRIES * 16
+#endif
+
__PAGE_ALIGNED_BSS
NEXT_PAGE(empty_zero_page)
.skip PAGE_SIZE
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index cb339097b9e..b627746f6b1 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -131,7 +131,7 @@ static void __cpuinit init_thread_xstate(void)
* xsave_init().
*/
- if (!HAVE_HWFP) {
+ if (!cpu_has_fpu) {
/*
* Disable xsave as we do not support it if i387
* emulation is enabled.
@@ -158,6 +158,14 @@ void __cpuinit fpu_init(void)
unsigned long cr0;
unsigned long cr4_mask = 0;
+#ifndef CONFIG_MATH_EMULATION
+ if (!cpu_has_fpu) {
+ pr_emerg("No FPU found and no math emulation present\n");
+ pr_emerg("Giving up\n");
+ for (;;)
+ asm volatile("hlt");
+ }
+#endif
if (cpu_has_fxsr)
cr4_mask |= X86_CR4_OSFXSR;
if (cpu_has_xmm)
@@ -167,7 +175,7 @@ void __cpuinit fpu_init(void)
cr0 = read_cr0();
cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
- if (!HAVE_HWFP)
+ if (!cpu_has_fpu)
cr0 |= X86_CR0_EM;
write_cr0(cr0);
@@ -185,7 +193,7 @@ void __cpuinit fpu_init(void)
void fpu_finit(struct fpu *fpu)
{
- if (!HAVE_HWFP) {
+ if (!cpu_has_fpu) {
finit_soft_fpu(&fpu->state->soft);
return;
}
@@ -214,7 +222,7 @@ int init_fpu(struct task_struct *tsk)
int ret;
if (tsk_used_math(tsk)) {
- if (HAVE_HWFP && tsk == current)
+ if (cpu_has_fpu && tsk == current)
unlazy_fpu(tsk);
tsk->thread.fpu.last_cpu = ~0;
return 0;
@@ -511,14 +519,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
- if (!HAVE_HWFP)
+ if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr) {
+ if (!cpu_has_fxsr)
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.state->fsave, 0,
-1);
- }
sanitize_i387_state(target);
@@ -545,13 +552,13 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
sanitize_i387_state(target);
- if (!HAVE_HWFP)
+ if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr) {
+ if (!cpu_has_fxsr)
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->fsave, 0, -1);
- }
+ &target->thread.fpu.state->fsave, 0,
+ -1);
if (pos > 0 || count < sizeof(env))
convert_from_fxsr(&env, target);
@@ -592,3 +599,33 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
EXPORT_SYMBOL(dump_fpu);
#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
+
+static int __init no_387(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_FPU);
+ return 1;
+}
+
+__setup("no387", no_387);
+
+void __cpuinit fpu_detect(struct cpuinfo_x86 *c)
+{
+ unsigned long cr0;
+ u16 fsw, fcw;
+
+ fsw = fcw = 0xffff;
+
+ cr0 = read_cr0();
+ cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
+ write_cr0(cr0);
+
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+ : "+m" (fsw), "+m" (fcw));
+
+ if (fsw == 0 && (fcw & 0x103f) == 0x003f)
+ set_cpu_cap(c, X86_FEATURE_FPU);
+ else
+ clear_cpu_cap(c, X86_FEATURE_FPU);
+
+ /* The final cr0 value is set in fpu_init() */
+}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index ac0631d8996..3a8185c042a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -18,6 +18,9 @@
#include <asm/mce.h>
#include <asm/hw_irq.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace/irq_vectors.h>
+
atomic_t irq_err_count;
/* Function pointer for generic interrupt vector handling */
@@ -204,23 +207,21 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
/*
* Handler for X86_PLATFORM_IPI_VECTOR.
*/
-void smp_x86_platform_ipi(struct pt_regs *regs)
+void __smp_x86_platform_ipi(void)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- ack_APIC_irq();
-
- irq_enter();
-
- exit_idle();
-
inc_irq_stat(x86_platform_ipis);
if (x86_platform_ipi_callback)
x86_platform_ipi_callback();
+}
- irq_exit();
+void smp_x86_platform_ipi(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ entering_ack_irq();
+ __smp_x86_platform_ipi();
+ exiting_irq();
set_irq_regs(old_regs);
}
@@ -246,6 +247,18 @@ void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
}
#endif
+void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ entering_ack_irq();
+ trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
+ __smp_x86_platform_ipi();
+ trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
+ exiting_irq();
+ set_irq_regs(old_regs);
+}
+
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index ca8f703a1e7..636a55e4a13 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -8,14 +8,34 @@
#include <linux/irq_work.h>
#include <linux/hardirq.h>
#include <asm/apic.h>
+#include <asm/trace/irq_vectors.h>
-void smp_irq_work_interrupt(struct pt_regs *regs)
+static inline void irq_work_entering_irq(void)
{
irq_enter();
ack_APIC_irq();
+}
+
+static inline void __smp_irq_work_interrupt(void)
+{
inc_irq_stat(apic_irq_work_irqs);
irq_work_run();
- irq_exit();
+}
+
+void smp_irq_work_interrupt(struct pt_regs *regs)
+{
+ irq_work_entering_irq();
+ __smp_irq_work_interrupt();
+ exiting_irq();
+}
+
+void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+{
+ irq_work_entering_irq();
+ trace_irq_work_entry(IRQ_WORK_VECTOR);
+ __smp_irq_work_interrupt();
+ trace_irq_work_exit(IRQ_WORK_VECTOR);
+ exiting_irq();
}
void arch_irq_work_raise(void)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 9895a9a4138..211bce44552 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -365,10 +365,14 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src)
return insn.length;
}
-static void __kprobes arch_copy_kprobe(struct kprobe *p)
+static int __kprobes arch_copy_kprobe(struct kprobe *p)
{
+ int ret;
+
/* Copy an instruction with recovering if other optprobe modifies it.*/
- __copy_instruction(p->ainsn.insn, p->addr);
+ ret = __copy_instruction(p->ainsn.insn, p->addr);
+ if (!ret)
+ return -EINVAL;
/*
* __copy_instruction can modify the displacement of the instruction,
@@ -384,6 +388,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
/* Also, displacement change doesn't affect the first byte */
p->opcode = p->ainsn.insn[0];
+
+ return 0;
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -397,8 +403,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
return -ENOMEM;
- arch_copy_kprobe(p);
- return 0;
+
+ return arch_copy_kprobe(p);
}
void __kprobes arch_arm_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d2c381280e3..3dd37ebd591 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -242,6 +242,7 @@ void __init kvmclock_init(void)
if (!mem)
return;
hv_clock = __va(mem);
+ memset(hv_clock, 0, size);
if (kvm_register_clock("boot clock")) {
hv_clock = NULL;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index efdec7cd8e0..47ebb1dbfbc 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -31,48 +31,12 @@
#include <asm/microcode.h>
#include <asm/processor.h>
#include <asm/msr.h>
+#include <asm/microcode_amd.h>
MODULE_DESCRIPTION("AMD Microcode Update Driver");
MODULE_AUTHOR("Peter Oruba");
MODULE_LICENSE("GPL v2");
-#define UCODE_MAGIC 0x00414d44
-#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
-#define UCODE_UCODE_TYPE 0x00000001
-
-struct equiv_cpu_entry {
- u32 installed_cpu;
- u32 fixed_errata_mask;
- u32 fixed_errata_compare;
- u16 equiv_cpu;
- u16 res;
-} __attribute__((packed));
-
-struct microcode_header_amd {
- u32 data_code;
- u32 patch_id;
- u16 mc_patch_data_id;
- u8 mc_patch_data_len;
- u8 init_flag;
- u32 mc_patch_data_checksum;
- u32 nb_dev_id;
- u32 sb_dev_id;
- u16 processor_rev_id;
- u8 nb_rev_id;
- u8 sb_rev_id;
- u8 bios_api_rev;
- u8 reserved1[3];
- u32 match_reg[8];
-} __attribute__((packed));
-
-struct microcode_amd {
- struct microcode_header_amd hdr;
- unsigned int mpb[0];
-};
-
-#define SECTION_HDR_SIZE 8
-#define CONTAINER_HDR_SZ 12
-
static struct equiv_cpu_entry *equiv_cpu_table;
struct ucode_patch {
@@ -84,21 +48,10 @@ struct ucode_patch {
static LIST_HEAD(pcache);
-static u16 find_equiv_id(unsigned int cpu)
+static u16 __find_equiv_id(unsigned int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- int i = 0;
-
- if (!equiv_cpu_table)
- return 0;
-
- while (equiv_cpu_table[i].installed_cpu != 0) {
- if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
- return equiv_cpu_table[i].equiv_cpu;
-
- i++;
- }
- return 0;
+ return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig);
}
static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
@@ -163,7 +116,7 @@ static struct ucode_patch *find_patch(unsigned int cpu)
{
u16 equiv_id;
- equiv_id = find_equiv_id(cpu);
+ equiv_id = __find_equiv_id(cpu);
if (!equiv_id)
return NULL;
@@ -173,9 +126,20 @@ static struct ucode_patch *find_patch(unsigned int cpu)
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct ucode_patch *p;
csig->sig = cpuid_eax(0x00000001);
csig->rev = c->microcode;
+
+ /*
+ * a patch could have been loaded early, set uci->mc so that
+ * mc_bp_resume() can call apply_microcode()
+ */
+ p = find_patch(cpu);
+ if (p && (p->patch_id == csig->rev))
+ uci->mc = p->data;
+
pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
return 0;
@@ -215,7 +179,21 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size,
return patch_size;
}
-static int apply_microcode_amd(int cpu)
+int __apply_microcode_amd(struct microcode_amd *mc_amd)
+{
+ u32 rev, dummy;
+
+ wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
+
+ /* verify patch application was successful */
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+ if (rev != mc_amd->hdr.patch_id)
+ return -1;
+
+ return 0;
+}
+
+int apply_microcode_amd(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_amd *mc_amd;
@@ -242,19 +220,15 @@ static int apply_microcode_amd(int cpu)
return 0;
}
- wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
-
- /* verify patch application was successful */
- rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- if (rev != mc_amd->hdr.patch_id) {
+ if (__apply_microcode_amd(mc_amd))
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
- cpu, mc_amd->hdr.patch_id);
- return -1;
- }
+ cpu, mc_amd->hdr.patch_id);
+ else
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
+ mc_amd->hdr.patch_id);
- pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
- uci->cpu_sig.rev = rev;
- c->microcode = rev;
+ uci->cpu_sig.rev = mc_amd->hdr.patch_id;
+ c->microcode = mc_amd->hdr.patch_id;
return 0;
}
@@ -364,7 +338,7 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
return crnt_size;
}
-static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+static enum ucode_state __load_microcode_amd(int cpu, const u8 *data, size_t size)
{
enum ucode_state ret = UCODE_ERROR;
unsigned int leftover;
@@ -398,6 +372,32 @@ static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
return UCODE_OK;
}
+enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+{
+ enum ucode_state ret;
+
+ /* free old equiv table */
+ free_equiv_cpu_table();
+
+ ret = __load_microcode_amd(cpu, data, size);
+
+ if (ret != UCODE_OK)
+ cleanup();
+
+#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
+ /* save BSP's matching patch for early load */
+ if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
+ struct ucode_patch *p = find_patch(cpu);
+ if (p) {
+ memset(amd_bsp_mpb, 0, MPB_MAX_SIZE);
+ memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data),
+ MPB_MAX_SIZE));
+ }
+ }
+#endif
+ return ret;
+}
+
/*
* AMD microcode firmware naming convention, up to family 15h they are in
* the legacy file:
@@ -440,12 +440,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
goto fw_release;
}
- /* free old equiv table */
- free_equiv_cpu_table();
-
ret = load_microcode_amd(cpu, fw->data, fw->size);
- if (ret != UCODE_OK)
- cleanup();
fw_release:
release_firmware(fw);
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c
new file mode 100644
index 00000000000..1ac6e9aee76
--- /dev/null
+++ b/arch/x86/kernel/microcode_amd_early.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <jacob.shin@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/earlycpio.h>
+#include <linux/initrd.h>
+
+#include <asm/cpu.h>
+#include <asm/setup.h>
+#include <asm/microcode_amd.h>
+
+static bool ucode_loaded;
+static u32 ucode_new_rev;
+static unsigned long ucode_offset;
+static size_t ucode_size;
+
+/*
+ * Microcode patch container file is prepended to the initrd in cpio format.
+ * See Documentation/x86/early-microcode.txt
+ */
+static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
+
+static struct cpio_data __init find_ucode_in_initrd(void)
+{
+ long offset = 0;
+ char *path;
+ void *start;
+ size_t size;
+ unsigned long *uoffset;
+ size_t *usize;
+ struct cpio_data cd;
+
+#ifdef CONFIG_X86_32
+ struct boot_params *p;
+
+ /*
+ * On 32-bit, early load occurs before paging is turned on so we need
+ * to use physical addresses.
+ */
+ p = (struct boot_params *)__pa_nodebug(&boot_params);
+ path = (char *)__pa_nodebug(ucode_path);
+ start = (void *)p->hdr.ramdisk_image;
+ size = p->hdr.ramdisk_size;
+ uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
+ usize = (size_t *)__pa_nodebug(&ucode_size);
+#else
+ path = ucode_path;
+ start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
+ size = boot_params.hdr.ramdisk_size;
+ uoffset = &ucode_offset;
+ usize = &ucode_size;
+#endif
+
+ cd = find_cpio_data(path, start, size, &offset);
+ if (!cd.data)
+ return cd;
+
+ if (*(u32 *)cd.data != UCODE_MAGIC) {
+ cd.data = NULL;
+ cd.size = 0;
+ return cd;
+ }
+
+ *uoffset = (u8 *)cd.data - (u8 *)start;
+ *usize = cd.size;
+
+ return cd;
+}
+
+/*
+ * Early load occurs before we can vmalloc(). So we look for the microcode
+ * patch container file in initrd, traverse equivalent cpu table, look for a
+ * matching microcode patch, and update, all in initrd memory in place.
+ * When vmalloc() is available for use later -- on 64-bit during first AP load,
+ * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
+ * load_microcode_amd() to save equivalent cpu table and microcode patches in
+ * kernel heap memory.
+ */
+static void __cpuinit apply_ucode_in_initrd(void *ucode, size_t size)
+{
+ struct equiv_cpu_entry *eq;
+ u32 *header;
+ u8 *data;
+ u16 eq_id = 0;
+ int offset, left;
+ u32 rev, eax;
+ u32 *new_rev;
+ unsigned long *uoffset;
+ size_t *usize;
+
+#ifdef CONFIG_X86_32
+ new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
+ uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
+ usize = (size_t *)__pa_nodebug(&ucode_size);
+#else
+ new_rev = &ucode_new_rev;
+ uoffset = &ucode_offset;
+ usize = &ucode_size;
+#endif
+
+ data = ucode;
+ left = size;
+ header = (u32 *)data;
+
+ /* find equiv cpu table */
+
+ if (header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+ header[2] == 0) /* size */
+ return;
+
+ eax = cpuid_eax(0x00000001);
+
+ while (left > 0) {
+ eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
+
+ offset = header[2] + CONTAINER_HDR_SZ;
+ data += offset;
+ left -= offset;
+
+ eq_id = find_equiv_id(eq, eax);
+ if (eq_id)
+ break;
+
+ /*
+ * support multiple container files appended together. if this
+ * one does not have a matching equivalent cpu entry, we fast
+ * forward to the next container file.
+ */
+ while (left > 0) {
+ header = (u32 *)data;
+ if (header[0] == UCODE_MAGIC &&
+ header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
+ break;
+
+ offset = header[1] + SECTION_HDR_SIZE;
+ data += offset;
+ left -= offset;
+ }
+
+ /* mark where the next microcode container file starts */
+ offset = data - (u8 *)ucode;
+ *uoffset += offset;
+ *usize -= offset;
+ ucode = data;
+ }
+
+ if (!eq_id) {
+ *usize = 0;
+ return;
+ }
+
+ /* find ucode and update if needed */
+
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
+
+ while (left > 0) {
+ struct microcode_amd *mc;
+
+ header = (u32 *)data;
+ if (header[0] != UCODE_UCODE_TYPE || /* type */
+ header[1] == 0) /* size */
+ break;
+
+ mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
+ if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id)
+ if (__apply_microcode_amd(mc) == 0) {
+ rev = mc->hdr.patch_id;
+ *new_rev = rev;
+ }
+
+ offset = header[1] + SECTION_HDR_SIZE;
+ data += offset;
+ left -= offset;
+ }
+
+ /* mark where this microcode container file ends */
+ offset = *usize - (data - (u8 *)ucode);
+ *usize -= offset;
+
+ if (!(*new_rev))
+ *usize = 0;
+}
+
+void __init load_ucode_amd_bsp(void)
+{
+ struct cpio_data cd = find_ucode_in_initrd();
+ if (!cd.data)
+ return;
+
+ apply_ucode_in_initrd(cd.data, cd.size);
+}
+
+#ifdef CONFIG_X86_32
+u8 amd_bsp_mpb[MPB_MAX_SIZE];
+
+/*
+ * On 32-bit, since AP's early load occurs before paging is turned on, we
+ * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
+ * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
+ * save_microcode_in_initrd_amd() BSP's patch is copied to amd_bsp_mpb, which
+ * is used upon resume from suspend.
+ */
+void __cpuinit load_ucode_amd_ap(void)
+{
+ struct microcode_amd *mc;
+ unsigned long *initrd;
+ unsigned long *uoffset;
+ size_t *usize;
+ void *ucode;
+
+ mc = (struct microcode_amd *)__pa(amd_bsp_mpb);
+ if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
+ __apply_microcode_amd(mc);
+ return;
+ }
+
+ initrd = (unsigned long *)__pa(&initrd_start);
+ uoffset = (unsigned long *)__pa(&ucode_offset);
+ usize = (size_t *)__pa(&ucode_size);
+
+ if (!*usize || !*initrd)
+ return;
+
+ ucode = (void *)((unsigned long)__pa(*initrd) + *uoffset);
+ apply_ucode_in_initrd(ucode, *usize);
+}
+
+static void __init collect_cpu_sig_on_bsp(void *arg)
+{
+ unsigned int cpu = smp_processor_id();
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ uci->cpu_sig.sig = cpuid_eax(0x00000001);
+}
+#else
+static void __cpuinit collect_cpu_info_amd_early(struct cpuinfo_x86 *c,
+ struct ucode_cpu_info *uci)
+{
+ u32 rev, eax;
+
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
+ eax = cpuid_eax(0x00000001);
+
+ uci->cpu_sig.sig = eax;
+ uci->cpu_sig.rev = rev;
+ c->microcode = rev;
+ c->x86 = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+}
+
+void __cpuinit load_ucode_amd_ap(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ collect_cpu_info_amd_early(&cpu_data(cpu), ucode_cpu_info + cpu);
+
+ if (cpu && !ucode_loaded) {
+ void *ucode;
+
+ if (!ucode_size || !initrd_start)
+ return;
+
+ ucode = (void *)(initrd_start + ucode_offset);
+ if (load_microcode_amd(0, ucode, ucode_size) != UCODE_OK)
+ return;
+ ucode_loaded = true;
+ }
+
+ apply_microcode_amd(cpu);
+}
+#endif
+
+int __init save_microcode_in_initrd_amd(void)
+{
+ enum ucode_state ret;
+ void *ucode;
+#ifdef CONFIG_X86_32
+ unsigned int bsp = boot_cpu_data.cpu_index;
+ struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
+
+ if (!uci->cpu_sig.sig)
+ smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+#endif
+ if (ucode_new_rev)
+ pr_info("microcode: updated early to new patch_level=0x%08x\n",
+ ucode_new_rev);
+
+ if (ucode_loaded || !ucode_size || !initrd_start)
+ return 0;
+
+ ucode = (void *)(initrd_start + ucode_offset);
+ ret = load_microcode_amd(0, ucode, ucode_size);
+ if (ret != UCODE_OK)
+ return -EINVAL;
+
+ ucode_loaded = true;
+ return 0;
+}
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
index 833d51d6ee0..86119f63db0 100644
--- a/arch/x86/kernel/microcode_core_early.c
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -18,6 +18,7 @@
*/
#include <linux/module.h>
#include <asm/microcode_intel.h>
+#include <asm/microcode_amd.h>
#include <asm/processor.h>
#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
@@ -81,8 +82,18 @@ void __init load_ucode_bsp(void)
vendor = x86_vendor();
x86 = x86_family();
- if (vendor == X86_VENDOR_INTEL && x86 >= 6)
- load_ucode_intel_bsp();
+ switch (vendor) {
+ case X86_VENDOR_INTEL:
+ if (x86 >= 6)
+ load_ucode_intel_bsp();
+ break;
+ case X86_VENDOR_AMD:
+ if (x86 >= 0x10)
+ load_ucode_amd_bsp();
+ break;
+ default:
+ break;
+ }
}
void __cpuinit load_ucode_ap(void)
@@ -95,6 +106,36 @@ void __cpuinit load_ucode_ap(void)
vendor = x86_vendor();
x86 = x86_family();
- if (vendor == X86_VENDOR_INTEL && x86 >= 6)
- load_ucode_intel_ap();
+ switch (vendor) {
+ case X86_VENDOR_INTEL:
+ if (x86 >= 6)
+ load_ucode_intel_ap();
+ break;
+ case X86_VENDOR_AMD:
+ if (x86 >= 0x10)
+ load_ucode_amd_ap();
+ break;
+ default:
+ break;
+ }
+}
+
+int __init save_microcode_in_initrd(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_INTEL:
+ if (c->x86 >= 6)
+ save_microcode_in_initrd_intel();
+ break;
+ case X86_VENDOR_AMD:
+ if (c->x86 >= 0x10)
+ save_microcode_in_initrd_amd();
+ break;
+ default:
+ break;
+ }
+
+ return 0;
}
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
index 2e9e12871c2..dabef95506f 100644
--- a/arch/x86/kernel/microcode_intel_early.c
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -529,7 +529,7 @@ int save_mc_for_early(u8 *mc)
*/
ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
if (ret) {
- pr_err("Can not save microcode patch.\n");
+ pr_err("Cannot save microcode patch.\n");
goto out;
}
@@ -699,7 +699,7 @@ static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data,
* This function converts microcode patch offsets previously stored in
* mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
*/
-int __init save_microcode_in_initrd(void)
+int __init save_microcode_in_initrd_intel(void)
{
unsigned int count = mc_saved_data.mc_saved_count;
struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
@@ -711,7 +711,7 @@ int __init save_microcode_in_initrd(void)
microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
ret = save_microcode(&mc_saved_data, mc_saved, count);
if (ret)
- pr_err("Can not save microcod patches from initrd");
+ pr_err("Cannot save microcode patches from initrd.\n");
show_saved_mc();
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 60308053fdb..0920212e615 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -14,6 +14,7 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/nmi.h>
+#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/hardirq.h>
#include <linux/slab.h>
@@ -29,6 +30,9 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/nmi.h>
+
struct nmi_desc {
spinlock_t lock;
struct list_head head;
@@ -82,6 +86,15 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
#define nmi_to_desc(type) (&nmi_desc[type])
+static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
+static int __init nmi_warning_debugfs(void)
+{
+ debugfs_create_u64("nmi_longest_ns", 0644,
+ arch_debugfs_dir, &nmi_longest_ns);
+ return 0;
+}
+fs_initcall(nmi_warning_debugfs);
+
static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
{
struct nmi_desc *desc = nmi_to_desc(type);
@@ -96,8 +109,27 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
* can be latched at any given time. Walk the whole list
* to handle those situations.
*/
- list_for_each_entry_rcu(a, &desc->head, list)
- handled += a->handler(type, regs);
+ list_for_each_entry_rcu(a, &desc->head, list) {
+ u64 before, delta, whole_msecs;
+ int decimal_msecs, thishandled;
+
+ before = local_clock();
+ thishandled = a->handler(type, regs);
+ handled += thishandled;
+ delta = local_clock() - before;
+ trace_nmi_handler(a->handler, (int)delta, thishandled);
+
+ if (delta < nmi_longest_ns)
+ continue;
+
+ nmi_longest_ns = delta;
+ whole_msecs = do_div(delta, (1000 * 1000));
+ decimal_msecs = do_div(delta, 1000) % 1000;
+ printk_ratelimited(KERN_INFO
+ "INFO: NMI handler (%ps) took too long to run: "
+ "%lld.%03d msecs\n", a->handler, whole_msecs,
+ decimal_msecs);
+ }
rcu_read_unlock();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4e7a37ff03a..81a5f5e8f14 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -277,18 +277,6 @@ void exit_idle(void)
}
#endif
-void arch_cpu_idle_prepare(void)
-{
- /*
- * If we're the non-boot CPU, nothing set the stack canary up
- * for us. CPU0 already has it initialized but no harm in
- * doing it again. This is a good place for updating it, as
- * we wont ever return from this function (so the invalid
- * canaries already on the stack wont ever trigger).
- */
- boot_init_stack_canary();
-}
-
void arch_cpu_idle_enter(void)
{
local_touch_nmi();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7305f7dfc7a..f8adefca71d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -110,11 +110,16 @@ void __show_regs(struct pt_regs *regs, int all)
get_debugreg(d1, 1);
get_debugreg(d2, 2);
get_debugreg(d3, 3);
- printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
- d0, d1, d2, d3);
-
get_debugreg(d6, 6);
get_debugreg(d7, 7);
+
+ /* Only print out debug registers if they are in their non-default state. */
+ if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
+ (d6 == DR6_RESERVED) && (d7 == 0x400))
+ return;
+
+ printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+ d0, d1, d2, d3);
printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
d6, d7);
}
@@ -147,7 +152,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs->bp = arg;
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 355ae06dbf9..05646bab4ca 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -105,11 +105,18 @@ void __show_regs(struct pt_regs *regs, int all)
get_debugreg(d0, 0);
get_debugreg(d1, 1);
get_debugreg(d2, 2);
- printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
get_debugreg(d3, 3);
get_debugreg(d6, 6);
get_debugreg(d7, 7);
+
+ /* Only print out debug registers if they are in their non-default state. */
+ if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
+ (d6 == DR6_RESERVED) && (d7 == 0x400))
+ return;
+
+ printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+
}
void release_thread(struct task_struct *dead_task)
@@ -176,7 +183,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs->bp = arg;
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
return 0;
}
*childregs = *current_pt_regs();
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 36818f8ec2b..e13f8e7c22a 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -186,7 +186,7 @@ identity_mapped:
movl CP_PA_PGD(%ebx), %eax
movl %eax, %cr3
movl %cr0, %eax
- orl $(1<<31), %eax
+ orl $X86_CR0_PG, %eax
movl %eax, %cr0
lea PAGE_SIZE(%edi), %esp
movl %edi, %eax
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 7a6f3b3be3c..3fd2c693e47 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -151,21 +151,21 @@ identity_mapped:
testq %r11, %r11
jnz 1f
- xorq %rax, %rax
- xorq %rbx, %rbx
- xorq %rcx, %rcx
- xorq %rdx, %rdx
- xorq %rsi, %rsi
- xorq %rdi, %rdi
- xorq %rbp, %rbp
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r9
- xorq %r11, %r11
- xorq %r12, %r12
- xorq %r13, %r13
- xorq %r14, %r14
- xorq %r15, %r15
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ xorl %r8d, %r8d
+ xorl %r9d, %r9d
+ xorl %r10d, %r10d
+ xorl %r11d, %r11d
+ xorl %r12d, %r12d
+ xorl %r13d, %r13d
+ xorl %r14d, %r14d
+ xorl %r15d, %r15d
ret
@@ -212,8 +212,8 @@ virtual_mapped:
/* Do the copies */
swap_pages:
movq %rdi, %rcx /* Put the page_list in %rcx */
- xorq %rdi, %rdi
- xorq %rsi, %rsi
+ xorl %edi, %edi
+ xorl %esi, %esi
jmp 1f
0: /* top, read another word for the indirection page */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 69562992e45..cf913587d4d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -43,12 +43,6 @@
#include <asm/sigframe.h>
-#ifdef CONFIG_X86_32
-# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
-#else
-# define FIX_EFLAGS __FIX_EFLAGS
-#endif
-
#define COPY(x) do { \
get_user_ex(regs->x, &sc->x); \
} while (0)
@@ -668,15 +662,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
if (!failed) {
/*
* Clear the direction flag as per the ABI for function entry.
- */
- regs->flags &= ~X86_EFLAGS_DF;
- /*
+ *
+ * Clear RF when entering the signal handler, because
+ * it might disable possible debug exception from the
+ * signal handler.
+ *
* Clear TF when entering the signal handler, but
* notify any tracer that was single-stepping it.
* The tracer may want to single-step inside the
* handler too.
*/
- regs->flags &= ~X86_EFLAGS_TF;
+ regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
}
signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 48d2b7ded42..f4fe0b8879e 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -30,6 +30,7 @@
#include <asm/proto.h>
#include <asm/apic.h>
#include <asm/nmi.h>
+#include <asm/trace/irq_vectors.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
*
@@ -249,32 +250,80 @@ finish:
/*
* Reschedule call back.
*/
-void smp_reschedule_interrupt(struct pt_regs *regs)
+static inline void __smp_reschedule_interrupt(void)
{
- ack_APIC_irq();
inc_irq_stat(irq_resched_count);
scheduler_ipi();
+}
+
+void smp_reschedule_interrupt(struct pt_regs *regs)
+{
+ ack_APIC_irq();
+ __smp_reschedule_interrupt();
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
}
-void smp_call_function_interrupt(struct pt_regs *regs)
+void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+{
+ ack_APIC_irq();
+ trace_reschedule_entry(RESCHEDULE_VECTOR);
+ __smp_reschedule_interrupt();
+ trace_reschedule_exit(RESCHEDULE_VECTOR);
+ /*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
+}
+
+static inline void call_function_entering_irq(void)
{
ack_APIC_irq();
irq_enter();
+}
+
+static inline void __smp_call_function_interrupt(void)
+{
generic_smp_call_function_interrupt();
inc_irq_stat(irq_call_count);
- irq_exit();
}
-void smp_call_function_single_interrupt(struct pt_regs *regs)
+void smp_call_function_interrupt(struct pt_regs *regs)
+{
+ call_function_entering_irq();
+ __smp_call_function_interrupt();
+ exiting_irq();
+}
+
+void smp_trace_call_function_interrupt(struct pt_regs *regs)
+{
+ call_function_entering_irq();
+ trace_call_function_entry(CALL_FUNCTION_VECTOR);
+ __smp_call_function_interrupt();
+ trace_call_function_exit(CALL_FUNCTION_VECTOR);
+ exiting_irq();
+}
+
+static inline void __smp_call_function_single_interrupt(void)
{
- ack_APIC_irq();
- irq_enter();
generic_smp_call_function_single_interrupt();
inc_irq_stat(irq_call_count);
- irq_exit();
+}
+
+void smp_call_function_single_interrupt(struct pt_regs *regs)
+{
+ call_function_entering_irq();
+ __smp_call_function_single_interrupt();
+ exiting_irq();
+}
+
+void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+{
+ call_function_entering_irq();
+ trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
+ __smp_call_function_single_interrupt();
+ trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
+ exiting_irq();
}
static int __init nonmi_ipi_setup(char *str)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9c73b51817e..bfd348e9936 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -372,15 +372,15 @@ static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
void __cpuinit set_cpu_sibling_map(int cpu)
{
- bool has_mc = boot_cpu_data.x86_max_cores > 1;
bool has_smt = smp_num_siblings > 1;
+ bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
int i;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
- if (!has_smt && !has_mc) {
+ if (!has_mp) {
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, cpu_core_mask(cpu));
@@ -394,7 +394,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
if ((i == cpu) || (has_smt && match_smt(c, o)))
link_mask(sibling, cpu, i);
- if ((i == cpu) || (has_mc && match_llc(c, o)))
+ if ((i == cpu) || (has_mp && match_llc(c, o)))
link_mask(llc_shared, cpu, i);
}
@@ -406,7 +406,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
- if ((i == cpu) || (has_mc && match_mc(c, o))) {
+ if ((i == cpu) || (has_mp && match_mc(c, o))) {
link_mask(core, cpu, i);
/*
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index f84fe00fad4..3ff42d2f046 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -31,6 +31,7 @@
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/tboot.h>
+#include <linux/debugfs.h>
#include <asm/realmode.h>
#include <asm/processor.h>
@@ -338,6 +339,73 @@ static struct notifier_block tboot_cpu_notifier __cpuinitdata =
.notifier_call = tboot_cpu_callback,
};
+#ifdef CONFIG_DEBUG_FS
+
+#define TBOOT_LOG_UUID { 0x26, 0x25, 0x19, 0xc0, 0x30, 0x6b, 0xb4, 0x4d, \
+ 0x4c, 0x84, 0xa3, 0xe9, 0x53, 0xb8, 0x81, 0x74 }
+
+#define TBOOT_SERIAL_LOG_ADDR 0x60000
+#define TBOOT_SERIAL_LOG_SIZE 0x08000
+#define LOG_MAX_SIZE_OFF 16
+#define LOG_BUF_OFF 24
+
+static uint8_t tboot_log_uuid[16] = TBOOT_LOG_UUID;
+
+static ssize_t tboot_log_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
+{
+ void __iomem *log_base;
+ u8 log_uuid[16];
+ u32 max_size;
+ void *kbuf;
+ int ret = -EFAULT;
+
+ log_base = ioremap_nocache(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE);
+ if (!log_base)
+ return ret;
+
+ memcpy_fromio(log_uuid, log_base, sizeof(log_uuid));
+ if (memcmp(&tboot_log_uuid, log_uuid, sizeof(log_uuid)))
+ goto err_iounmap;
+
+ max_size = readl(log_base + LOG_MAX_SIZE_OFF);
+ if (*ppos >= max_size) {
+ ret = 0;
+ goto err_iounmap;
+ }
+
+ if (*ppos + count > max_size)
+ count = max_size - *ppos;
+
+ kbuf = kmalloc(count, GFP_KERNEL);
+ if (!kbuf) {
+ ret = -ENOMEM;
+ goto err_iounmap;
+ }
+
+ memcpy_fromio(kbuf, log_base + LOG_BUF_OFF + *ppos, count);
+ if (copy_to_user(user_buf, kbuf, count))
+ goto err_kfree;
+
+ *ppos += count;
+
+ ret = count;
+
+err_kfree:
+ kfree(kbuf);
+
+err_iounmap:
+ iounmap(log_base);
+
+ return ret;
+}
+
+static const struct file_operations tboot_log_fops = {
+ .read = tboot_log_read,
+ .llseek = default_llseek,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+
static __init int tboot_late_init(void)
{
if (!tboot_enabled())
@@ -348,6 +416,11 @@ static __init int tboot_late_init(void)
atomic_set(&ap_wfs_count, 0);
register_hotcpu_notifier(&tboot_cpu_notifier);
+#ifdef CONFIG_DEBUG_FS
+ debugfs_create_file("tboot_log", S_IRUSR,
+ arch_debugfs_dir, NULL, &tboot_log_fops);
+#endif
+
acpi_os_set_prepare_sleep(&tboot_sleep);
return 0;
}
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
new file mode 100644
index 00000000000..4e584a8d6ed
--- /dev/null
+++ b/arch/x86/kernel/tracepoint.c
@@ -0,0 +1,61 @@
+/*
+ * Code for supporting irq vector tracepoints.
+ *
+ * Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
+ *
+ */
+#include <asm/hw_irq.h>
+#include <asm/desc.h>
+#include <linux/atomic.h>
+
+atomic_t trace_idt_ctr = ATOMIC_INIT(0);
+struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+ (unsigned long) trace_idt_table };
+
+#ifndef CONFIG_X86_64
+gate_desc trace_idt_table[NR_VECTORS] __page_aligned_data
+ = { { { { 0, 0 } } }, };
+#endif
+
+static int trace_irq_vector_refcount;
+static DEFINE_MUTEX(irq_vector_mutex);
+
+static void set_trace_idt_ctr(int val)
+{
+ atomic_set(&trace_idt_ctr, val);
+ /* Ensure the trace_idt_ctr is set before sending IPI */
+ wmb();
+}
+
+static void switch_idt(void *arg)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ load_current_idt();
+ local_irq_restore(flags);
+}
+
+void trace_irq_vector_regfunc(void)
+{
+ mutex_lock(&irq_vector_mutex);
+ if (!trace_irq_vector_refcount) {
+ set_trace_idt_ctr(1);
+ smp_call_function(switch_idt, NULL, 0);
+ switch_idt(NULL);
+ }
+ trace_irq_vector_refcount++;
+ mutex_unlock(&irq_vector_mutex);
+}
+
+void trace_irq_vector_unregfunc(void)
+{
+ mutex_lock(&irq_vector_mutex);
+ trace_irq_vector_refcount--;
+ if (!trace_irq_vector_refcount) {
+ set_trace_idt_ctr(0);
+ smp_call_function(switch_idt, NULL, 0);
+ switch_idt(NULL);
+ }
+ mutex_unlock(&irq_vector_mutex);
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 772e2a846de..b0865e88d3c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -254,6 +254,9 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_DF;
+#ifdef CONFIG_DOUBLEFAULT
+ df_debug(regs, error_code);
+#endif
/*
* This is always a kernel trap and never fixable (and thus must
* never return).
@@ -437,7 +440,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
- if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+ if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
SIGTRAP) == NOTIFY_STOP)
goto exit;
@@ -785,7 +788,7 @@ void __init trap_init(void)
x86_init.irqs.trap_init();
#ifdef CONFIG_X86_64
- memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
+ memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
set_nmi_gate(X86_TRAP_DB, &debug);
set_nmi_gate(X86_TRAP_BP, &int3);
#endif
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index ada87a329ed..d6c28acdf99 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -243,7 +243,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
if (!access_ok(VERIFY_WRITE, buf, size))
return -EACCES;
- if (!HAVE_HWFP)
+ if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_get(current, NULL, 0,
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
@@ -350,11 +350,10 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
if (!used_math() && init_fpu(tsk))
return -1;
- if (!HAVE_HWFP) {
+ if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
NULL, buf) != 0;
- }
if (use_xsave()) {
struct _fpx_sw_bytes fx_sw_user;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 260a9193955..b30f5a54a2a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7942,7 +7942,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
- vmx_set_rflags(vcpu, X86_EFLAGS_BIT1);
+ vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
/*
* Note that calling vmx_set_cr0 is important, even if cr0 hasn't
* actually changed, because it depends on the current state of
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 094b5d96ab1..292e6ca89f4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -582,8 +582,6 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
if (index != XCR_XFEATURE_ENABLED_MASK)
return 1;
xcr0 = xcr;
- if (kvm_x86_ops->get_cpl(vcpu) != 0)
- return 1;
if (!(xcr0 & XSTATE_FP))
return 1;
if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
@@ -597,7 +595,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
- if (__kvm_set_xcr(vcpu, index, xcr)) {
+ if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
+ __kvm_set_xcr(vcpu, index, xcr)) {
kvm_inject_gp(vcpu, 0);
return 1;
}
@@ -619,7 +618,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
return 1;
- if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_RDWRGSFS))
+ if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
return 1;
if (is_long_mode(vcpu)) {
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 7114c63f047..d482bcaf61c 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1410,7 +1410,7 @@ __init void lguest_init(void)
new_cpu_data.x86_capability[0] = cpuid_edx(1);
/* Math is always hard! */
- new_cpu_data.hard_math = 1;
+ set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
/* We don't have features. We have puppies! Puppies! */
#ifdef CONFIG_X86_MCE
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index ae1aa71d011..7e73e8c6909 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -16,169 +16,6 @@
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
-static unsigned long page_table_shareable(struct vm_area_struct *svma,
- struct vm_area_struct *vma,
- unsigned long addr, pgoff_t idx)
-{
- unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
- svma->vm_start;
- unsigned long sbase = saddr & PUD_MASK;
- unsigned long s_end = sbase + PUD_SIZE;
-
- /* Allow segments to share if only one is marked locked */
- unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
- unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
-
- /*
- * match the virtual addresses, permission and the alignment of the
- * page table page.
- */
- if (pmd_index(addr) != pmd_index(saddr) ||
- vm_flags != svm_flags ||
- sbase < svma->vm_start || svma->vm_end < s_end)
- return 0;
-
- return saddr;
-}
-
-static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
-{
- unsigned long base = addr & PUD_MASK;
- unsigned long end = base + PUD_SIZE;
-
- /*
- * check on proper vm_flags and page table alignment
- */
- if (vma->vm_flags & VM_MAYSHARE &&
- vma->vm_start <= base && end <= vma->vm_end)
- return 1;
- return 0;
-}
-
-/*
- * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
- * and returns the corresponding pte. While this is not necessary for the
- * !shared pmd case because we can allocate the pmd later as well, it makes the
- * code much cleaner. pmd allocation is essential for the shared case because
- * pud has to be populated inside the same i_mmap_mutex section - otherwise
- * racing tasks could either miss the sharing (see huge_pte_offset) or select a
- * bad pmd for sharing.
- */
-static pte_t *
-huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
-{
- struct vm_area_struct *vma = find_vma(mm, addr);
- struct address_space *mapping = vma->vm_file->f_mapping;
- pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
- vma->vm_pgoff;
- struct vm_area_struct *svma;
- unsigned long saddr;
- pte_t *spte = NULL;
- pte_t *pte;
-
- if (!vma_shareable(vma, addr))
- return (pte_t *)pmd_alloc(mm, pud, addr);
-
- mutex_lock(&mapping->i_mmap_mutex);
- vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
- if (svma == vma)
- continue;
-
- saddr = page_table_shareable(svma, vma, addr, idx);
- if (saddr) {
- spte = huge_pte_offset(svma->vm_mm, saddr);
- if (spte) {
- get_page(virt_to_page(spte));
- break;
- }
- }
- }
-
- if (!spte)
- goto out;
-
- spin_lock(&mm->page_table_lock);
- if (pud_none(*pud))
- pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
- else
- put_page(virt_to_page(spte));
- spin_unlock(&mm->page_table_lock);
-out:
- pte = (pte_t *)pmd_alloc(mm, pud, addr);
- mutex_unlock(&mapping->i_mmap_mutex);
- return pte;
-}
-
-/*
- * unmap huge page backed by shared pte.
- *
- * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
- * indicated by page_count > 1, unmap is achieved by clearing pud and
- * decrementing the ref count. If count == 1, the pte page is not shared.
- *
- * called with vma->vm_mm->page_table_lock held.
- *
- * returns: 1 successfully unmapped a shared pte page
- * 0 the underlying pte page is not shared, or it is the last user
- */
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
-{
- pgd_t *pgd = pgd_offset(mm, *addr);
- pud_t *pud = pud_offset(pgd, *addr);
-
- BUG_ON(page_count(virt_to_page(ptep)) == 0);
- if (page_count(virt_to_page(ptep)) == 1)
- return 0;
-
- pud_clear(pud);
- put_page(virt_to_page(ptep));
- *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
- return 1;
-}
-
-pte_t *huge_pte_alloc(struct mm_struct *mm,
- unsigned long addr, unsigned long sz)
-{
- pgd_t *pgd;
- pud_t *pud;
- pte_t *pte = NULL;
-
- pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
- if (pud) {
- if (sz == PUD_SIZE) {
- pte = (pte_t *)pud;
- } else {
- BUG_ON(sz != PMD_SIZE);
- if (pud_none(*pud))
- pte = huge_pmd_share(mm, addr, pud);
- else
- pte = (pte_t *)pmd_alloc(mm, pud, addr);
- }
- }
- BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
-
- return pte;
-}
-
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd = NULL;
-
- pgd = pgd_offset(mm, addr);
- if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (pud_present(*pud)) {
- if (pud_large(*pud))
- return (pte_t *)pud;
- pmd = pmd_offset(pud, addr);
- }
- }
- return (pte_t *) pmd;
-}
-
#if 0 /* This is just for testing */
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
@@ -240,30 +77,6 @@ int pud_huge(pud_t pud)
return !!(pud_val(pud) & _PAGE_PSE);
}
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int write)
-{
- struct page *page;
-
- page = pte_page(*(pte_t *)pmd);
- if (page)
- page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
- return page;
-}
-
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int write)
-{
- struct page *page;
-
- page = pte_page(*(pte_t *)pud);
- if (page)
- page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
- return page;
-}
-
#endif
/* x86_64 also uses this file */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index eaac1743def..1f34e921977 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -277,6 +277,9 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
end_pfn = limit_pfn;
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+ if (!after_bootmem)
+ adjust_range_page_size_mask(mr, nr_range);
+
/* try to merge same page size and continuous */
for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
unsigned long old_start;
@@ -291,9 +294,6 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
nr_range--;
}
- if (!after_bootmem)
- adjust_range_page_size_mask(mr, nr_range);
-
for (i = 0; i < nr_range; i++)
printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
mr[i].start, mr[i].end - 1,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bb00c4672ad..b3940b6b4d7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -368,7 +368,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
*
* from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
*
- * phys_addr holds the negative offset to the kernel, which is added
+ * phys_base holds the negative offset to the kernel, which is added
* to the compile time generated pmds. This results in invalid pmds up
* to the point where we hit the physaddr 0 mapping.
*
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 9a1e6583910..0215e2c563e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -501,15 +501,15 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
}
if (slot < 0) {
- printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n",
- (u64)phys_addr, size);
+ printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n",
+ __func__, (u64)phys_addr, size);
WARN_ON(1);
return NULL;
}
if (early_ioremap_debug) {
- printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ",
- (u64)phys_addr, size, slot);
+ printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ",
+ __func__, (u64)phys_addr, size, slot);
dump_stack();
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 82089d8b195..b410b71bdcf 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -42,7 +42,6 @@
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/bcd.h>
-#include <linux/ucs2_string.h>
#include <asm/setup.h>
#include <asm/efi.h>
@@ -54,12 +53,12 @@
#define EFI_DEBUG 1
-/*
- * There's some additional metadata associated with each
- * variable. Intel's reference implementation is 60 bytes - bump that
- * to account for potential alignment constraints
- */
-#define VAR_METADATA_SIZE 64
+#define EFI_MIN_RESERVE 5120
+
+#define EFI_DUMMY_GUID \
+ EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
+
+static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
struct efi __read_mostly efi = {
.mps = EFI_INVALID_TABLE_ADDR,
@@ -79,13 +78,6 @@ struct efi_memory_map memmap;
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
-static u64 efi_var_store_size;
-static u64 efi_var_remaining_size;
-static u64 efi_var_max_var_size;
-static u64 boot_used_size;
-static u64 boot_var_size;
-static u64 active_size;
-
unsigned long x86_efi_facility;
/*
@@ -188,53 +180,8 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
- efi_status_t status;
- static bool finished = false;
- static u64 var_size;
-
- status = efi_call_virt3(get_next_variable,
- name_size, name, vendor);
-
- if (status == EFI_NOT_FOUND) {
- finished = true;
- if (var_size < boot_used_size) {
- boot_var_size = boot_used_size - var_size;
- active_size += boot_var_size;
- } else {
- printk(KERN_WARNING FW_BUG "efi: Inconsistent initial sizes\n");
- }
- }
-
- if (boot_used_size && !finished) {
- unsigned long size = 0;
- u32 attr;
- efi_status_t s;
- void *tmp;
-
- s = virt_efi_get_variable(name, vendor, &attr, &size, NULL);
-
- if (s != EFI_BUFFER_TOO_SMALL || !size)
- return status;
-
- tmp = kmalloc(size, GFP_ATOMIC);
-
- if (!tmp)
- return status;
-
- s = virt_efi_get_variable(name, vendor, &attr, &size, tmp);
-
- if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) {
- var_size += size;
- var_size += ucs2_strsize(name, 1024);
- active_size += size;
- active_size += VAR_METADATA_SIZE;
- active_size += ucs2_strsize(name, 1024);
- }
-
- kfree(tmp);
- }
-
- return status;
+ return efi_call_virt3(get_next_variable,
+ name_size, name, vendor);
}
static efi_status_t virt_efi_set_variable(efi_char16_t *name,
@@ -243,34 +190,9 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
unsigned long data_size,
void *data)
{
- efi_status_t status;
- u32 orig_attr = 0;
- unsigned long orig_size = 0;
-
- status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size,
- NULL);
-
- if (status != EFI_BUFFER_TOO_SMALL)
- orig_size = 0;
-
- status = efi_call_virt5(set_variable,
- name, vendor, attr,
- data_size, data);
-
- if (status == EFI_SUCCESS) {
- if (orig_size) {
- active_size -= orig_size;
- active_size -= ucs2_strsize(name, 1024);
- active_size -= VAR_METADATA_SIZE;
- }
- if (data_size) {
- active_size += data_size;
- active_size += ucs2_strsize(name, 1024);
- active_size += VAR_METADATA_SIZE;
- }
- }
-
- return status;
+ return efi_call_virt5(set_variable,
+ name, vendor, attr,
+ data_size, data);
}
static efi_status_t virt_efi_query_variable_info(u32 attr,
@@ -786,9 +708,6 @@ void __init efi_init(void)
char vendor[100] = "unknown";
int i = 0;
void *tmp;
- struct setup_data *data;
- struct efi_var_bootdata *efi_var_data;
- u64 pa_data;
#ifdef CONFIG_X86_32
if (boot_params.efi_info.efi_systab_hi ||
@@ -806,22 +725,6 @@ void __init efi_init(void)
if (efi_systab_init(efi_phys.systab))
return;
- pa_data = boot_params.hdr.setup_data;
- while (pa_data) {
- data = early_ioremap(pa_data, sizeof(*efi_var_data));
- if (data->type == SETUP_EFI_VARS) {
- efi_var_data = (struct efi_var_bootdata *)data;
-
- efi_var_store_size = efi_var_data->store_size;
- efi_var_remaining_size = efi_var_data->remaining_size;
- efi_var_max_var_size = efi_var_data->max_var_size;
- }
- pa_data = data->next;
- early_iounmap(data, sizeof(*efi_var_data));
- }
-
- boot_used_size = efi_var_store_size - efi_var_remaining_size;
-
set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
/*
@@ -1026,6 +929,13 @@ void __init efi_enter_virtual_mode(void)
va = efi_ioremap(md->phys_addr, size,
md->type, md->attribute);
+ if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
+ if (!va)
+ pr_err("ioremap of 0x%llX failed!\n",
+ (unsigned long long)md->phys_addr);
+ continue;
+ }
+
md->virt_addr = (u64) (unsigned long) va;
if (!va) {
@@ -1085,6 +995,13 @@ void __init efi_enter_virtual_mode(void)
runtime_code_page_mkexec();
kfree(new_memmap);
+
+ /* clean DUMMY object */
+ efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS,
+ 0, NULL);
}
/*
@@ -1136,33 +1053,70 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
efi_status_t status;
u64 storage_size, remaining_size, max_size;
+ if (!(attributes & EFI_VARIABLE_NON_VOLATILE))
+ return 0;
+
status = efi.query_variable_info(attributes, &storage_size,
&remaining_size, &max_size);
if (status != EFI_SUCCESS)
return status;
- if (!max_size && remaining_size > size)
- printk_once(KERN_ERR FW_BUG "Broken EFI implementation"
- " is returning MaxVariableSize=0\n");
/*
* Some firmware implementations refuse to boot if there's insufficient
* space in the variable store. We account for that by refusing the
* write if permitting it would reduce the available space to under
- * 50%. However, some firmware won't reclaim variable space until
- * after the used (not merely the actively used) space drops below
- * a threshold. We can approximate that case with the value calculated
- * above. If both the firmware and our calculations indicate that the
- * available space would drop below 50%, refuse the write.
+ * 5KB. This figure was provided by Samsung, so should be safe.
*/
+ if ((remaining_size - size < EFI_MIN_RESERVE) &&
+ !efi_no_storage_paranoia) {
+
+ /*
+ * Triggering garbage collection may require that the firmware
+ * generate a real EFI_OUT_OF_RESOURCES error. We can force
+ * that by attempting to use more space than is available.
+ */
+ unsigned long dummy_size = remaining_size + 1024;
+ void *dummy = kzalloc(dummy_size, GFP_ATOMIC);
+
+ if (!dummy)
+ return EFI_OUT_OF_RESOURCES;
+
+ status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS,
+ dummy_size, dummy);
+
+ if (status == EFI_SUCCESS) {
+ /*
+ * This should have failed, so if it didn't make sure
+ * that we delete it...
+ */
+ efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS,
+ 0, dummy);
+ }
+
+ kfree(dummy);
- if (!storage_size || size > remaining_size ||
- (max_size && size > max_size))
- return EFI_OUT_OF_RESOURCES;
+ /*
+ * The runtime code may now have triggered a garbage collection
+ * run, so check the variable info again
+ */
+ status = efi.query_variable_info(attributes, &storage_size,
+ &remaining_size, &max_size);
- if (!efi_no_storage_paranoia &&
- ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) &&
- (remaining_size - size < storage_size / 2)))
- return EFI_OUT_OF_RESOURCES;
+ if (status != EFI_SUCCESS)
+ return status;
+
+ /*
+ * There still isn't enough room, so return an error
+ */
+ if (remaining_size - size < EFI_MIN_RESERVE)
+ return EFI_OUT_OF_RESOURCES;
+ }
return EFI_SUCCESS;
}
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 590be109089..f7bab68a4b8 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -42,9 +42,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
"^(xen_irq_disable_direct_reloc$|"
"xen_save_fl_direct_reloc$|"
"VDSO|"
-#if ELF_BITS == 64
- "__vvar_page|"
-#endif
"__crc_)",
/*
@@ -72,6 +69,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
"__per_cpu_load|"
"init_per_cpu__.*|"
"__end_rodata_hpage_align|"
+ "__vvar_page|"
#endif
"_end)$"
};
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 0faad646f5f..d6bfb876cfb 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -372,7 +372,7 @@ subsys_initcall(sysenter_setup);
/* Register vsyscall32 into the ABI table */
#include <linux/sysctl.h>
-static ctl_table abi_table2[] = {
+static struct ctl_table abi_table2[] = {
{
.procname = "vsyscall32",
.data = &sysctl_vsyscall32,
@@ -383,7 +383,7 @@ static ctl_table abi_table2[] = {
{}
};
-static ctl_table abi_root_table2[] = {
+static struct ctl_table abi_root_table2[] = {
{
.procname = "abi",
.mode = 0555,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a492be2635a..2fa02bc5003 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1557,7 +1557,7 @@ asmlinkage void __init xen_start_kernel(void)
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
cpu_detect(&new_cpu_data);
- new_cpu_data.hard_math = 1;
+ set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
new_cpu_data.wp_works_ok = 1;
new_cpu_data.x86_capability[0] = cpuid_edx(1);
#endif
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index fb44426fe93..d99cae8147d 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/irq_work.h>
+#include <linux/tick.h>
#include <asm/paravirt.h>
#include <asm/desc.h>
@@ -447,6 +448,13 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
cpu_bringup();
+ /*
+ * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
+ * clears certain data that the cpu_idle loop (which called us
+ * and that we return from) expects. The only way to get that
+ * data back is to call:
+ */
+ tick_nohz_idle_enter();
}
#else /* !CONFIG_HOTPLUG_CPU */