summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-02 11:41:11 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-02 11:41:11 -0800
commit597b0d21626da4e6f09f132442caf0cc2b0eb47c (patch)
tree13c0074bb20f7b05a471e78d4ff52c665a10266a /arch/x86/kernel
parent2640c9a90fa596871e142f42052608864335f102 (diff)
parent87917239204d67a316cb89751750f86c9ed3640b (diff)
Merge branch 'kvm-updates/2.6.29' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates/2.6.29' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (140 commits) KVM: MMU: handle large host sptes on invlpg/resync KVM: Add locking to virtual i8259 interrupt controller KVM: MMU: Don't treat a global pte as such if cr4.pge is cleared MAINTAINERS: Maintainership changes for kvm/ia64 KVM: ia64: Fix kvm_arch_vcpu_ioctl_[gs]et_regs() KVM: x86: Rework user space NMI injection as KVM_CAP_USER_NMI KVM: VMX: Fix pending NMI-vs.-IRQ race for user space irqchip KVM: fix handling of ACK from shared guest IRQ KVM: MMU: check for present pdptr shadow page in walk_shadow KVM: Consolidate userspace memory capability reporting into common code KVM: Advertise the bug in memory region destruction as fixed KVM: use cpumask_var_t for cpus_hardware_enabled KVM: use modern cpumask primitives, no cpumask_t on stack KVM: Extract core of kvm_flush_remote_tlbs/kvm_reload_remote_mmus KVM: set owner of cpu and vm file operations anon_inodes: use fops->owner for module refcount x86: KVM guest: kvm_get_tsc_khz: return khz, not lpj KVM: MMU: prepopulate the shadow on invlpg KVM: MMU: skip global pgtables on sync due to cr3 switch KVM: MMU: collapse remote TLB flushes on root sync ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h18
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/kvmclock.c10
-rw-r--r--arch/x86/kernel/reboot.c62
6 files changed, 90 insertions, 34 deletions
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01ee..b59ddcc88cd 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
#include <asm/pat.h>
#include "mtrr.h"
-struct mtrr_state {
- struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
- mtrr_type fixed_ranges[NUM_FIXED_RANGES];
- unsigned char enabled;
- unsigned char have_fixed;
- mtrr_type def_type;
-};
-
struct fixed_range_block {
int base_msr; /* start address of an MTRR block */
int ranges; /* number of MTRRs in this block */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
};
static unsigned long smp_changes_mask;
-static struct mtrr_state mtrr_state = {};
static int mtrr_state_set;
u64 mtrr_tom2;
+struct mtrr_state_type mtrr_state = {};
+EXPORT_SYMBOL_GPL(mtrr_state);
+
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e269e59..d6ec7ec3027 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
u32 num_var_ranges = 0;
-unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
unsigned long lsize;
};
-static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
+static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
{
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec656b2..ffd60409cc6 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,11 +8,6 @@
#define MTRRcap_MSR 0x0fe
#define MTRRdefType_MSR 0x2ff
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
-#define NUM_FIXED_RANGES 88
-#define MAX_VAR_RANGES 256
#define MTRRfix64K_00000_MSR 0x250
#define MTRRfix16K_80000_MSR 0x258
#define MTRRfix16K_A0000_MSR 0x259
@@ -29,11 +24,7 @@
#define MTRR_CHANGE_MASK_VARIABLE 0x02
#define MTRR_CHANGE_MASK_DEFTYPE 0x04
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
- an 8 bit field: */
-typedef u8 mtrr_type;
-
-extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
struct mtrr_ops {
u32 vendor;
@@ -70,13 +61,6 @@ struct set_mtrr_context {
u32 ccr3;
};
-struct mtrr_var_range {
- u32 base_lo;
- u32 base_hi;
- u32 mask_lo;
- u32 mask_hi;
-};
-
void set_mtrr_done(struct set_mtrr_context *ctxt);
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852e4cd..c689d19e35a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
#include <linux/kdebug.h>
#include <asm/smp.h>
#include <asm/reboot.h>
+#include <asm/virtext.h>
#include <mach_ipi.h>
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
#endif
crash_save_cpu(regs, cpu);
+ /* Disable VMX or SVM if needed.
+ *
+ * We need to disable virtualization on all CPUs.
+ * Having VMX or SVM enabled on any CPU may break rebooting
+ * after the kdump kernel has finished its task.
+ */
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+
disable_local_APIC();
}
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
local_irq_disable();
kdump_nmi_shootdown_cpus();
+
+ /* Booting kdump kernel with VMX or SVM enabled won't work,
+ * because (among other limitations) we can't disable paging
+ * with the virt flags.
+ */
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+
lapic_shutdown();
#if defined(CONFIG_X86_IO_APIC)
disable_IO_APIC();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9b6a6..652fce6d2cc 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
*/
static unsigned long kvm_get_tsc_khz(void)
{
- return preset_lpj;
+ struct pvclock_vcpu_time_info *src;
+ src = &per_cpu(hv_clock, 0);
+ return pvclock_tsc_khz(src);
}
static void kvm_get_preset_lpj(void)
{
- struct pvclock_vcpu_time_info *src;
unsigned long khz;
u64 lpj;
- src = &per_cpu(hv_clock, 0);
- khz = pvclock_tsc_khz(src);
+ khz = kvm_get_tsc_khz();
lpj = ((u64)khz * 1000);
do_div(lpj, HZ);
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
#endif
kvm_get_preset_lpj();
clocksource_register(&kvm_clock);
+ pv_info.paravirt_enabled = 1;
+ pv_info.name = "KVM";
}
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718df6ee..72e0e4e712d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
#include <asm/proto.h>
#include <asm/reboot_fixups.h>
#include <asm/reboot.h>
+#include <asm/virtext.h>
#ifdef CONFIG_X86_32
# include <linux/dmi.h>
@@ -39,6 +40,12 @@ int reboot_force;
static int reboot_cpu = -1;
#endif
+/* This is set if we need to go through the 'emergency' path.
+ * When machine_emergency_restart() is called, we may be on
+ * an inconsistent state and won't be able to do a clean cleanup
+ */
+static int reboot_emergency;
+
/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
bool port_cf9_safe = false;
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
}
}
+static void vmxoff_nmi(int cpu, struct die_args *args)
+{
+ cpu_emergency_vmxoff();
+}
+
+/* Use NMIs as IPIs to tell all CPUs to disable virtualization
+ */
+static void emergency_vmx_disable_all(void)
+{
+ /* Just make sure we won't change CPUs while doing this */
+ local_irq_disable();
+
+ /* We need to disable VMX on all CPUs before rebooting, otherwise
+ * we risk hanging up the machine, because the CPU ignore INIT
+ * signals when VMX is enabled.
+ *
+ * We can't take any locks and we may be on an inconsistent
+ * state, so we use NMIs as IPIs to tell the other CPUs to disable
+ * VMX and halt.
+ *
+ * For safety, we will avoid running the nmi_shootdown_cpus()
+ * stuff unnecessarily, but we don't have a way to check
+ * if other CPUs have VMX enabled. So we will call it only if the
+ * CPU we are running on has VMX enabled.
+ *
+ * We will miss cases where VMX is not enabled on all CPUs. This
+ * shouldn't do much harm because KVM always enable VMX on all
+ * CPUs anyway. But we can miss it on the small window where KVM
+ * is still enabling VMX.
+ */
+ if (cpu_has_vmx() && cpu_vmx_enabled()) {
+ /* Disable VMX on this CPU.
+ */
+ cpu_vmxoff();
+
+ /* Halt and disable VMX on the other CPUs */
+ nmi_shootdown_cpus(vmxoff_nmi);
+
+ }
+}
+
+
void __attribute__((weak)) mach_reboot_fixups(void)
{
}
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
{
int i;
+ if (reboot_emergency)
+ emergency_vmx_disable_all();
+
/* Tell the BIOS if we want cold or warm reboot */
*((unsigned short *)__va(0x472)) = reboot_mode;
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
#endif
}
+static void __machine_emergency_restart(int emergency)
+{
+ reboot_emergency = emergency;
+ machine_ops.emergency_restart();
+}
+
static void native_machine_restart(char *__unused)
{
printk("machine restart\n");
if (!reboot_force)
machine_shutdown();
- machine_emergency_restart();
+ __machine_emergency_restart(0);
}
static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
void machine_emergency_restart(void)
{
- machine_ops.emergency_restart();
+ __machine_emergency_restart(1);
}
void machine_restart(char *cmd)