summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c140
1 files changed, 85 insertions, 55 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d004da1e35..39c28f09dfd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
static bool ignore_msrs = 0;
module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
+unsigned int min_timer_period_us = 500;
+module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
+
bool kvm_has_tsc_control;
EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
u32 kvm_max_guest_tsc_khz;
@@ -254,10 +257,26 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_apic_base);
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
-{
- /* TODO: reserve bits check */
- kvm_lapic_set_base(vcpu, data);
+int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+ u64 old_state = vcpu->arch.apic_base &
+ (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+ u64 new_state = msr_info->data &
+ (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+ u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
+ 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
+
+ if (!msr_info->host_initiated &&
+ ((msr_info->data & reserved_bits) != 0 ||
+ new_state == X2APIC_ENABLE ||
+ (new_state == MSR_IA32_APICBASE_ENABLE &&
+ old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
+ (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
+ old_state == 0)))
+ return 1;
+
+ kvm_lapic_set_base(vcpu, msr_info->data);
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
@@ -719,6 +738,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
+static void kvm_update_dr6(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
+}
+
static void kvm_update_dr7(struct kvm_vcpu *vcpu)
{
unsigned long dr7;
@@ -747,6 +772,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
if (val & 0xffffffff00000000ULL)
return -1; /* #GP */
vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+ kvm_update_dr6(vcpu);
break;
case 5:
if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -788,7 +814,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
return 1;
/* fall through */
case 6:
- *val = vcpu->arch.dr6;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ *val = vcpu->arch.dr6;
+ else
+ *val = kvm_x86_ops->get_dr6(vcpu);
break;
case 5:
if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -836,11 +865,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
* kvm-specific. Those are put in the beginning of the list.
*/
-#define KVM_SAVE_MSRS_BEGIN 10
+#define KVM_SAVE_MSRS_BEGIN 12
static u32 msrs_to_save[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+ HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN,
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1275,8 +1305,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm->arch.last_tsc_write = data;
kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
- /* Reset of TSC must disable overshoot protection below */
- vcpu->arch.hv_clock.tsc_timestamp = 0;
vcpu->arch.last_guest_tsc = data;
/* Keep track of which generation this VCPU has synchronized to */
@@ -1484,7 +1512,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
unsigned long flags, this_tsc_khz;
struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch;
- s64 kernel_ns, max_kernel_ns;
+ s64 kernel_ns;
u64 tsc_timestamp, host_tsc;
struct pvclock_vcpu_time_info guest_hv_clock;
u8 pvclock_flags;
@@ -1543,37 +1571,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
if (!vcpu->pv_time_enabled)
return 0;
- /*
- * Time as measured by the TSC may go backwards when resetting the base
- * tsc_timestamp. The reason for this is that the TSC resolution is
- * higher than the resolution of the other clock scales. Thus, many
- * possible measurments of the TSC correspond to one measurement of any
- * other clock, and so a spread of values is possible. This is not a
- * problem for the computation of the nanosecond clock; with TSC rates
- * around 1GHZ, there can only be a few cycles which correspond to one
- * nanosecond value, and any path through this code will inevitably
- * take longer than that. However, with the kernel_ns value itself,
- * the precision may be much lower, down to HZ granularity. If the
- * first sampling of TSC against kernel_ns ends in the low part of the
- * range, and the second in the high end of the range, we can get:
- *
- * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
- *
- * As the sampling errors potentially range in the thousands of cycles,
- * it is possible such a time value has already been observed by the
- * guest. To protect against this, we must compute the system time as
- * observed by the guest and ensure the new system time is greater.
- */
- max_kernel_ns = 0;
- if (vcpu->hv_clock.tsc_timestamp) {
- max_kernel_ns = vcpu->last_guest_tsc -
- vcpu->hv_clock.tsc_timestamp;
- max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
- vcpu->hv_clock.tsc_to_system_mul,
- vcpu->hv_clock.tsc_shift);
- max_kernel_ns += vcpu->last_kernel_ns;
- }
-
if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
&vcpu->hv_clock.tsc_shift,
@@ -1581,14 +1578,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hw_tsc_khz = this_tsc_khz;
}
- /* with a master <monotonic time, tsc value> tuple,
- * pvclock clock reads always increase at the (scaled) rate
- * of guest TSC - no need to deal with sampling errors.
- */
- if (!use_master_clock) {
- if (max_kernel_ns > kernel_ns)
- kernel_ns = max_kernel_ns;
- }
/* With all the info we got, fill in the values */
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
@@ -1826,6 +1815,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
switch (msr) {
case HV_X64_MSR_GUEST_OS_ID:
case HV_X64_MSR_HYPERCALL:
+ case HV_X64_MSR_REFERENCE_TSC:
+ case HV_X64_MSR_TIME_REF_COUNT:
r = true;
break;
}
@@ -1865,6 +1856,21 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
if (__copy_to_user((void __user *)addr, instructions, 4))
return 1;
kvm->arch.hv_hypercall = data;
+ mark_page_dirty(kvm, gfn);
+ break;
+ }
+ case HV_X64_MSR_REFERENCE_TSC: {
+ u64 gfn;
+ HV_REFERENCE_TSC_PAGE tsc_ref;
+ memset(&tsc_ref, 0, sizeof(tsc_ref));
+ kvm->arch.hv_tsc_page = data;
+ if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+ break;
+ gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+ if (kvm_write_guest(kvm, data,
+ &tsc_ref, sizeof(tsc_ref)))
+ return 1;
+ mark_page_dirty(kvm, gfn);
break;
}
default:
@@ -1879,19 +1885,21 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
switch (msr) {
case HV_X64_MSR_APIC_ASSIST_PAGE: {
+ u64 gfn;
unsigned long addr;
if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
vcpu->arch.hv_vapic = data;
break;
}
- addr = gfn_to_hva(vcpu->kvm, data >>
- HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
+ gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+ addr = gfn_to_hva(vcpu->kvm, gfn);
if (kvm_is_error_hva(addr))
return 1;
if (__clear_user((void __user *)addr, PAGE_SIZE))
return 1;
vcpu->arch.hv_vapic = data;
+ mark_page_dirty(vcpu->kvm, gfn);
break;
}
case HV_X64_MSR_EOI:
@@ -2017,8 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case 0x200 ... 0x2ff:
return set_msr_mtrr(vcpu, msr, data);
case MSR_IA32_APICBASE:
- kvm_set_apic_base(vcpu, data);
- break;
+ return kvm_set_apic_base(vcpu, msr_info);
case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
return kvm_x2apic_msr_write(vcpu, msr, data);
case MSR_IA32_TSCDEADLINE:
@@ -2291,6 +2298,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case HV_X64_MSR_HYPERCALL:
data = kvm->arch.hv_hypercall;
break;
+ case HV_X64_MSR_TIME_REF_COUNT: {
+ data =
+ div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+ break;
+ }
+ case HV_X64_MSR_REFERENCE_TSC:
+ data = kvm->arch.hv_tsc_page;
+ break;
default:
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
@@ -2601,6 +2616,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM:
+ case KVM_CAP_HYPERV_TIME:
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
@@ -2972,8 +2988,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
struct kvm_debugregs *dbgregs)
{
+ unsigned long val;
+
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
- dbgregs->dr6 = vcpu->arch.dr6;
+ _kvm_get_dr(vcpu, 6, &val);
+ dbgregs->dr6 = val;
dbgregs->dr7 = vcpu->arch.dr7;
dbgregs->flags = 0;
memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
@@ -2987,7 +3006,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
vcpu->arch.dr6 = dbgregs->dr6;
+ kvm_update_dr6(vcpu);
vcpu->arch.dr7 = dbgregs->dr7;
+ kvm_update_dr7(vcpu);
return 0;
}
@@ -5834,6 +5855,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_apic_update_tmr(vcpu, tmr);
}
+/*
+ * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * exiting to the userspace. Otherwise, the value will be returned to the
+ * userspace.
+ */
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
@@ -6089,7 +6115,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
}
if (need_resched()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- kvm_resched(vcpu);
+ cond_resched();
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
}
}
@@ -6401,6 +6427,7 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ struct msr_data apic_base_msr;
int mmu_reset_needed = 0;
int pending_vec, max_bits, idx;
struct desc_ptr dt;
@@ -6424,7 +6451,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
kvm_x86_ops->set_efer(vcpu, sregs->efer);
- kvm_set_apic_base(vcpu, sregs->apic_base);
+ apic_base_msr.data = sregs->apic_base;
+ apic_base_msr.host_initiated = true;
+ kvm_set_apic_base(vcpu, &apic_base_msr);
mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
@@ -6717,6 +6746,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
vcpu->arch.dr6 = DR6_FIXED_1;
+ kvm_update_dr6(vcpu);
vcpu->arch.dr7 = DR7_FIXED_1;
kvm_update_dr7(vcpu);