From b3ae2096974b12c3af2ad1a4e7716b084949867f Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:56:33 +0900 Subject: KVM: Introduce kvm_unmap_hva_range() for kvm_mmu_notifier_invalidate_range_start() When we tested KVM under memory pressure, with THP enabled on the host, we noticed that MMU notifier took a long time to invalidate huge pages. Since the invalidation was done with mmu_lock held, it not only wasted the CPU but also made the host harder to respond. This patch mitigates this by using kvm_handle_hva_range(). Signed-off-by: Takuya Yoshikawa Cc: Alexander Graf Cc: Paul Mackerras Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b3ce91c623e..e2b1a159e5d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -332,8 +332,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * count is also read inside the mmu_lock critical section. */ kvm->mmu_notifier_count++; - for (; start < end; start += PAGE_SIZE) - need_tlb_flush |= kvm_unmap_hva(kvm, start); + need_tlb_flush = kvm_unmap_hva_range(kvm, start, end); need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) -- cgit v1.2.3-70-g09d2 From 903816fa4d016e20ec71a1a97700cfcdda115580 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:54:11 +0800 Subject: KVM: using get_fault_pfn to get the fault pfn Using get_fault_pfn to cleanup the code Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 6 ++---- include/linux/kvm_host.h | 5 +---- virt/kvm/kvm_main.c | 13 ++++--------- 3 files changed, 7 insertions(+), 17 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 685a4855738..f85cc21ae95 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2513,10 +2513,8 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, unsigned long hva; slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); - if (!slot) { - get_page(fault_page); - return page_to_pfn(fault_page); - } + if (!slot) + return get_fault_pfn(); hva = gfn_to_hva_memslot(slot, gfn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6f6c18a03c5..1a7f838d30c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -383,15 +383,11 @@ id_to_memslot(struct kvm_memslots *slots, int id) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } extern struct page *bad_page; -extern struct page *fault_page; - extern pfn_t bad_pfn; -extern pfn_t fault_pfn; int is_error_page(struct page *page); int is_error_pfn(pfn_t pfn); int is_hwpoison_pfn(pfn_t pfn); -int is_fault_pfn(pfn_t pfn); int is_noslot_pfn(pfn_t pfn); int is_invalid_pfn(pfn_t pfn); int kvm_is_error_hva(unsigned long addr); @@ -441,6 +437,7 @@ void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); void kvm_set_pfn_accessed(pfn_t pfn); void kvm_get_pfn(pfn_t pfn); +pfn_t get_fault_pfn(void); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e2b1a159e5d..0fbbf2d2160 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -103,8 +103,8 @@ static bool largepages_enabled = true; static struct page *hwpoison_page; static pfn_t hwpoison_pfn; -struct page *fault_page; -pfn_t fault_pfn; +static struct page *fault_page; +static pfn_t fault_pfn; inline int kvm_is_mmio_pfn(pfn_t pfn) { @@ -949,12 +949,6 @@ int is_hwpoison_pfn(pfn_t pfn) } EXPORT_SYMBOL_GPL(is_hwpoison_pfn); -int is_fault_pfn(pfn_t pfn) -{ - return pfn == fault_pfn; -} -EXPORT_SYMBOL_GPL(is_fault_pfn); - int is_noslot_pfn(pfn_t pfn) { return pfn == bad_pfn; @@ -1038,11 +1032,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_hva); -static pfn_t get_fault_pfn(void) +pfn_t get_fault_pfn(void) { get_page(fault_page); return fault_pfn; } +EXPORT_SYMBOL_GPL(get_fault_pfn); int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) -- cgit v1.2.3-70-g09d2 From ca0565f5736e67af3172d188577b57e303dd082a Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:54:52 +0800 Subject: KVM: make bad_pfn static to kvm_main.c bad_pfn is not used out of kvm_main.c, so mark it static, also move it near hwpoison_pfn and fault_pfn Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1a7f838d30c..5f956cde137 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -383,7 +383,6 @@ id_to_memslot(struct kvm_memslots *slots, int id) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } extern struct page *bad_page; -extern pfn_t bad_pfn; int is_error_page(struct page *page); int is_error_pfn(pfn_t pfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0fbbf2d2160..f955eee92aa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -100,6 +100,9 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; +struct page *bad_page; +static pfn_t bad_pfn; + static struct page *hwpoison_page; static pfn_t hwpoison_pfn; @@ -2691,9 +2694,6 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, }; -struct page *bad_page; -pfn_t bad_pfn; - static inline struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) { -- cgit v1.2.3-70-g09d2 From d566104853361cc377c61f70e41c1ad3d44b86c6 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:56:16 +0800 Subject: KVM: remove the unused parameter of gfn_to_pfn_memslot The parameter, 'kvm', is not used in gfn_to_pfn_memslot, we can happily remove it Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/powerpc/kvm/e500_tlb.c | 2 +- arch/x86/kvm/mmu.c | 2 +- include/linux/kvm_host.h | 5 ++--- virt/kvm/iommu.c | 10 +++++----- virt/kvm/kvm_main.c | 15 +++++++-------- 5 files changed, 16 insertions(+), 18 deletions(-) (limited to 'virt') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index c510fc96130..c8f6c582674 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -520,7 +520,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (likely(!pfnmap)) { unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); - pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); + pfn = gfn_to_pfn_memslot(slot, gfn); if (is_error_pfn(pfn)) { printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", (long)gfn); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f85cc21ae95..4f77f7ac6d2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2518,7 +2518,7 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, hva = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn_atomic(vcpu->kvm, hva); + return hva_to_pfn_atomic(hva); } static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e8d13a072d2..db9aa917840 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -418,15 +418,14 @@ void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); -pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); +pfn_t hva_to_pfn_atomic(unsigned long addr); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, bool write_fault, bool *writable); pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); -pfn_t gfn_to_pfn_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn); +pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_pfn_dirty(pfn_t); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index e9fff9830bf..c03f1fb2670 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); -static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, - gfn_t gfn, unsigned long size) +static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, + unsigned long size) { gfn_t end_gfn; pfn_t pfn; - pfn = gfn_to_pfn_memslot(kvm, slot, gfn); + pfn = gfn_to_pfn_memslot(slot, gfn); end_gfn = gfn + (size >> PAGE_SHIFT); gfn += 1; @@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, return pfn; while (gfn < end_gfn) - gfn_to_pfn_memslot(kvm, slot, gfn++); + gfn_to_pfn_memslot(slot, gfn++); return pfn; } @@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) * Pin all pages we are about to map in memory. This is * important because we unmap and unpin in 4kb steps later. */ - pfn = kvm_pin_pages(kvm, slot, gfn, page_size); + pfn = kvm_pin_pages(slot, gfn, page_size); if (is_error_pfn(pfn)) { gfn += 1; continue; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f955eee92aa..68dda513cd7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1062,8 +1062,8 @@ static inline int check_user_page_hwpoison(unsigned long addr) return rc == -EHWPOISON; } -static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, - bool *async, bool write_fault, bool *writable) +static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, + bool write_fault, bool *writable) { struct page *page[1]; int npages = 0; @@ -1143,9 +1143,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, return pfn; } -pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) +pfn_t hva_to_pfn_atomic(unsigned long addr) { - return hva_to_pfn(kvm, addr, true, NULL, true, NULL); + return hva_to_pfn(addr, true, NULL, true, NULL); } EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); @@ -1163,7 +1163,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, return page_to_pfn(bad_page); } - return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); + return hva_to_pfn(addr, atomic, async, write_fault, writable); } pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) @@ -1192,11 +1192,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, } EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); -pfn_t gfn_to_pfn_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn) +pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { unsigned long addr = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn(kvm, addr, false, NULL, true, NULL); + return hva_to_pfn(addr, false, NULL, true, NULL); } int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, -- cgit v1.2.3-70-g09d2 From 93b6547e2219784b2df790353e083e0bdbebd2c2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 19 Jul 2012 13:55:53 +0300 Subject: KVM: switch to symbolic name for irq_states size Use PIC_NUM_PINS instead of hard-coded 16 for pic pins. Signed-off-by: Michael S. Tsirkin Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/irq.h | 2 +- virt/kvm/irq_comm.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2086f2bfba3..2d03568e949 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -70,7 +70,7 @@ struct kvm_pic { struct kvm_io_device dev_slave; struct kvm_io_device dev_eclr; void (*ack_notifier)(void *opaque, int irq); - unsigned long irq_states[16]; + unsigned long irq_states[PIC_NUM_PINS]; }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a6a0365475e..22aae8fd146 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -343,11 +343,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, switch (ue->u.irqchip.irqchip) { case KVM_IRQCHIP_PIC_MASTER: e->set = kvm_set_pic_irq; - max_pin = 16; + max_pin = PIC_NUM_PINS; break; case KVM_IRQCHIP_PIC_SLAVE: e->set = kvm_set_pic_irq; - max_pin = 16; + max_pin = PIC_NUM_PINS; delta = 8; break; case KVM_IRQCHIP_IOAPIC: -- cgit v1.2.3-70-g09d2 From f2a743473194a1ad44a85f8b63aeef9d63e5bf47 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Wed, 18 Jul 2012 19:07:32 +0530 Subject: KVM: Add config to support ple or cpu relax optimzation Suggested-by: Avi Kivity Signed-off-by: Raghavendra K T Reviewed-by: Marcelo Tosatti Reviewed-by: Rik van Riel Tested-by: Christian Borntraeger # on s390x Signed-off-by: Avi Kivity --- arch/s390/kvm/Kconfig | 1 + arch/x86/kvm/Kconfig | 1 + virt/kvm/Kconfig | 3 +++ 3 files changed, 5 insertions(+) (limited to 'virt') diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 78eb9847008..a6e2677724e 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM depends on HAVE_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a28f338843e..45c044f0fff 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -37,6 +37,7 @@ config KVM select TASK_DELAY_ACCT select PERF_EVENTS select HAVE_KVM_MSI + select HAVE_KVM_CPU_RELAX_INTERCEPT ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 28694f4a913..d01b24b72c6 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -21,3 +21,6 @@ config KVM_ASYNC_PF config HAVE_KVM_MSI bool + +config HAVE_KVM_CPU_RELAX_INTERCEPT + bool -- cgit v1.2.3-70-g09d2 From 4c088493c8d07e4e27bad53a99dcfdc14cdf45f8 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Wed, 18 Jul 2012 19:07:46 +0530 Subject: KVM: Note down when cpu relax intercepted or pause loop exited Noting pause loop exited vcpu or cpu relax intercepted helps in filtering right candidate to yield. Wrong selection of vcpu; i.e., a vcpu that just did a pl-exit or cpu relax intercepted may contribute to performance degradation. Signed-off-by: Raghavendra K T Reviewed-by: Marcelo Tosatti Reviewed-by: Rik van Riel Tested-by: Christian Borntraeger # on s390x Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 34 ++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 5 +++++ 2 files changed, 39 insertions(+) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index db9aa917840..361b36fe7ec 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -183,6 +183,18 @@ struct kvm_vcpu { } async_pf; #endif +#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT + /* + * Cpu relax intercept or pause loop exit optimization + * in_spin_loop: set when a vcpu does a pause loop exit + * or cpu relax intercepted. + * dy_eligible: indicates whether vcpu is eligible for directed yield. + */ + struct { + bool in_spin_loop; + bool dy_eligible; + } spin_loop; +#endif struct kvm_vcpu_arch arch; }; @@ -898,5 +910,27 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } } +#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT + +static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) +{ + vcpu->spin_loop.in_spin_loop = val; +} +static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) +{ + vcpu->spin_loop.dy_eligible = val; +} + +#else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ + +static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) +{ +} + +static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) +{ +} + +#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68dda513cd7..0892b75eeed 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -239,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) } vcpu->run = page_address(page); + kvm_vcpu_set_in_spin_loop(vcpu, false); + kvm_vcpu_set_dy_eligible(vcpu, false); + r = kvm_arch_vcpu_init(vcpu); if (r < 0) goto fail_free_run; @@ -1585,6 +1588,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) int pass; int i; + kvm_vcpu_set_in_spin_loop(me, true); /* * We boost the priority of a VCPU that is runnable but not * currently running, because it got preempted by something @@ -1610,6 +1614,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) } } } + kvm_vcpu_set_in_spin_loop(me, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -- cgit v1.2.3-70-g09d2 From 06e48c510aa37f6e791602e6420422ea7071fe94 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Thu, 19 Jul 2012 15:17:52 +0530 Subject: KVM: Choose better candidate for directed yield Currently, on a large vcpu guests, there is a high probability of yielding to the same vcpu who had recently done a pause-loop exit or cpu relax intercepted. Such a yield can lead to the vcpu spinning again and hence degrade the performance. The patchset keeps track of the pause loop exit/cpu relax interception and gives chance to a vcpu which: (a) Has not done pause loop exit or cpu relax intercepted at all (probably he is preempted lock-holder) (b) Was skipped in last iteration because it did pause loop exit or cpu relax intercepted, and probably has become eligible now (next eligible lock holder) Signed-off-by: Raghavendra K T Reviewed-by: Marcelo Tosatti Reviewed-by: Rik van Riel Tested-by: Christian Borntraeger # on s390x Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 5 +++++ virt/kvm/kvm_main.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 361b36fe7ec..74a78d09c45 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -931,6 +931,11 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } +static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) +{ + return true; +} + #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0892b75eeed..1e10ebe1a37 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1579,6 +1579,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) } EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); +#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT +/* + * Helper that checks whether a VCPU is eligible for directed yield. + * Most eligible candidate to yield is decided by following heuristics: + * + * (a) VCPU which has not done pl-exit or cpu relax intercepted recently + * (preempted lock holder), indicated by @in_spin_loop. + * Set at the beiginning and cleared at the end of interception/PLE handler. + * + * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get + * chance last time (mostly it has become eligible now since we have probably + * yielded to lockholder in last iteration. This is done by toggling + * @dy_eligible each time a VCPU checked for eligibility.) + * + * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding + * to preempted lock-holder could result in wrong VCPU selection and CPU + * burning. Giving priority for a potential lock-holder increases lock + * progress. + * + * Since algorithm is based on heuristics, accessing another VCPU data without + * locking does not harm. It may result in trying to yield to same VCPU, fail + * and continue with next VCPU and so on. + */ +bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) +{ + bool eligible; + + eligible = !vcpu->spin_loop.in_spin_loop || + (vcpu->spin_loop.in_spin_loop && + vcpu->spin_loop.dy_eligible); + + if (vcpu->spin_loop.in_spin_loop) + kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); + + return eligible; +} +#endif void kvm_vcpu_on_spin(struct kvm_vcpu *me) { struct kvm *kvm = me->kvm; @@ -1607,6 +1644,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (waitqueue_active(&vcpu->wq)) continue; + if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) + continue; if (kvm_vcpu_yield_to(vcpu)) { kvm->last_boosted_vcpu = i; yielded = 1; @@ -1615,6 +1654,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) } } kvm_vcpu_set_in_spin_loop(me, false); + + /* Ensure vcpu is not eligible during next spinloop */ + kvm_vcpu_set_dy_eligible(me, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -- cgit v1.2.3-70-g09d2