From f0888f70151c7f53de2b45ee20ff1905837943e8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Feb 2012 00:54:17 +0000 Subject: KVM: PPC: Book3S HV: Make secondary threads more robust against stray IPIs Currently on POWER7, if we are running the guest on a core and we don't need all the hardware threads, we do nothing to ensure that the unused threads aren't executing in the kernel (other than checking that they are offline). We just assume they're napping and we don't do anything to stop them trying to enter the kernel while the guest is running. This means that a stray IPI can wake up the hardware thread and it will then try to enter the kernel, but since the core is in guest context, it will execute code from the guest in hypervisor mode once it turns the MMU on, which tends to lead to crashes or hangs in the host. This fixes the problem by adding two new one-byte flags in the kvmppc_host_state structure in the PACA which are used to interlock between the primary thread and the unused secondary threads when entering the guest. With these flags, the primary thread can ensure that the unused secondaries are not already in kernel mode (i.e. handling a stray IPI) and then indicate that they should not try to enter the kernel if they do get woken for any reason. Instead they will go into KVM code, find that there is no vcpu to run, acknowledge and clear the IPI and go back to nap mode. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 92 +++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 38 deletions(-) (limited to 'arch/powerpc/kvm/book3s_hv_rmhandlers.S') diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b70bf22a3ff..d595033bd44 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -26,6 +26,7 @@ #include #include #include +#include /***************************************************************************** * * @@ -82,6 +83,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline) #define XICS_XIRR 4 #define XICS_QIRR 0xc +#define XICS_IPI 2 /* interrupt source # for IPIs */ /* * We come in here when wakened from nap mode on a secondary hw thread. @@ -94,26 +96,54 @@ kvm_start_guest: subi r1,r1,STACK_FRAME_OVERHEAD ld r2,PACATOC(r13) - /* were we napping due to cede? */ - lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,0 - bne kvm_end_cede + li r0,KVM_HWTHREAD_IN_KVM + stb r0,HSTATE_HWTHREAD_STATE(r13) - /* get vcpu pointer */ - ld r4, HSTATE_KVM_VCPU(r13) + /* NV GPR values from power7_idle() will no longer be valid */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) - /* We got here with an IPI; clear it */ - ld r5, HSTATE_XICS_PHYS(r13) - li r0, 0xff - li r6, XICS_QIRR - li r7, XICS_XIRR - lwzcix r8, r5, r7 /* ack the interrupt */ + /* get vcpu pointer, NULL if we have no vcpu to run */ + ld r4,HSTATE_KVM_VCPU(r13) + cmpdi cr1,r4,0 + + /* Check the wake reason in SRR1 to see why we got here */ + mfspr r3,SPRN_SRR1 + rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ + cmpwi r3,4 /* was it an external interrupt? */ + bne 27f + + /* + * External interrupt - for now assume it is an IPI, since we + * should never get any other interrupts sent to offline threads. + * Only do this for secondary threads. + */ + beq cr1,25f + lwz r3,VCPU_PTID(r4) + cmpwi r3,0 + beq 27f +25: ld r5,HSTATE_XICS_PHYS(r13) + li r0,0xff + li r6,XICS_QIRR + li r7,XICS_XIRR + lwzcix r8,r5,r7 /* get and ack the interrupt */ sync - stbcix r0, r5, r6 /* clear it */ - stwcix r8, r5, r7 /* EOI it */ + clrldi. r9,r8,40 /* get interrupt source ID. */ + beq 27f /* none there? */ + cmpwi r9,XICS_IPI + bne 26f + stbcix r0,r5,r6 /* clear IPI */ +26: stwcix r8,r5,r7 /* EOI the interrupt */ - /* NV GPR values from power7_idle() will no longer be valid */ - stb r0, PACA_NAPSTATELOST(r13) +27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + + /* if we have no vcpu to run, go back to sleep */ + beq cr1,kvm_no_guest + + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede .global kvmppc_hv_entry kvmppc_hv_entry: @@ -1445,8 +1475,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * Take a nap until a decrementer or external interrupt occurs, * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR */ - li r0,0x80 - stb r0,PACAPROCSTART(r13) + li r0,1 + stb r0,HSTATE_HWTHREAD_REQ(r13) mfspr r5,SPRN_LPCR ori r5,r5,LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR,r5 @@ -1463,26 +1493,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) kvm_end_cede: /* Woken by external or decrementer interrupt */ ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATOC(r13) - /* If we're a secondary thread and we got here by an IPI, ack it */ - ld r4,HSTATE_KVM_VCPU(r13) - lwz r3,VCPU_PTID(r4) - cmpwi r3,0 - beq 27f - mfspr r3,SPRN_SRR1 - rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ - cmpwi r3,4 /* was it an external interrupt? */ - bne 27f - ld r5, HSTATE_XICS_PHYS(r13) - li r0,0xff - li r6,XICS_QIRR - li r7,XICS_XIRR - lwzcix r8,r5,r7 /* ack the interrupt */ - sync - stbcix r0,r5,r6 /* clear it */ - stwcix r8,r5,r7 /* EOI it */ -27: /* load up FP state */ bl kvmppc_load_fp @@ -1580,12 +1591,17 @@ secondary_nap: stwcx. r3, 0, r4 bne 51b +kvm_no_guest: + li r0, KVM_HWTHREAD_IN_NAP + stb r0, HSTATE_HWTHREAD_STATE(r13) + li r0, 0 + std r0, HSTATE_KVM_VCPU(r13) + li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 isync - li r0, 0 std r0, HSTATE_SCRATCH0(r13) ptesync ld r0, HSTATE_SCRATCH0(r13) -- cgit v1.2.3-70-g09d2 From 8943633cf9b87980d261a022e90d94bc2c55df35 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 2 Mar 2012 01:38:23 +0000 Subject: KVM: PPC: Work around POWER7 DABR corruption problem It turns out that on POWER7, writing to the DABR can cause a corrupted value to be written if the PMU is active and updating SDAR in continuous sampling mode. To work around this, we make sure that the PMU is inactive and SDAR updates are disabled (via MMCRA) when we are context-switching DABR. When the guest sets DABR via the H_SET_DABR hypercall, we use a slightly different workaround, which is to read back the DABR and write it again if it got corrupted. While we are at it, make it consistent that the saving and restoring of the guest's non-volatile GPRs and the FPRs are done with the guest setup of the PMU active. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/book3s_hv_interrupts.S | 9 +++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 93 +++++++++++++++++++-------------- 2 files changed, 61 insertions(+), 41 deletions(-) (limited to 'arch/powerpc/kvm/book3s_hv_rmhandlers.S') diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index d3fb4df02c4..84035a528c8 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -68,19 +68,24 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) rotldi r10,r10,16 mtmsrd r10,1 - /* Save host PMU registers and load guest PMU registers */ + /* Save host PMU registers */ /* R4 is live here (vcpu pointer) but not r3 or r5 */ li r3, 1 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r7, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ + mfspr r6, SPRN_MMCRA +BEGIN_FTR_SECTION + /* On P7, clear MMCRA in order to disable SDAR updates */ + li r5, 0 + mtspr SPRN_MMCRA, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ lbz r5, LPPACA_PMCINUSE(r3) cmpwi r5, 0 beq 31f /* skip if not */ mfspr r5, SPRN_MMCR1 - mfspr r6, SPRN_MMCRA std r7, HSTATE_MMCR(r13) std r5, HSTATE_MMCR + 8(r13) std r6, HSTATE_MMCR + 16(r13) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index d595033bd44..a84aafce2a1 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -159,24 +159,15 @@ kvmppc_hv_entry: mflr r0 std r0, HSTATE_VMHANDLER(r13) - ld r14, VCPU_GPR(r14)(r4) - ld r15, VCPU_GPR(r15)(r4) - ld r16, VCPU_GPR(r16)(r4) - ld r17, VCPU_GPR(r17)(r4) - ld r18, VCPU_GPR(r18)(r4) - ld r19, VCPU_GPR(r19)(r4) - ld r20, VCPU_GPR(r20)(r4) - ld r21, VCPU_GPR(r21)(r4) - ld r22, VCPU_GPR(r22)(r4) - ld r23, VCPU_GPR(r23)(r4) - ld r24, VCPU_GPR(r24)(r4) - ld r25, VCPU_GPR(r25)(r4) - ld r26, VCPU_GPR(r26)(r4) - ld r27, VCPU_GPR(r27)(r4) - ld r28, VCPU_GPR(r28)(r4) - ld r29, VCPU_GPR(r29)(r4) - ld r30, VCPU_GPR(r30)(r4) - ld r31, VCPU_GPR(r31)(r4) + /* Set partition DABR */ + /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ + li r5,3 + ld r6,VCPU_DABR(r4) + mtspr SPRN_DABRX,r5 + mtspr SPRN_DABR,r6 +BEGIN_FTR_SECTION + isync +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Load guest PMU registers */ /* R4 is live here (vcpu pointer) */ @@ -215,6 +206,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* Load up FP, VMX and VSX registers */ bl kvmppc_load_fp + ld r14, VCPU_GPR(r14)(r4) + ld r15, VCPU_GPR(r15)(r4) + ld r16, VCPU_GPR(r16)(r4) + ld r17, VCPU_GPR(r17)(r4) + ld r18, VCPU_GPR(r18)(r4) + ld r19, VCPU_GPR(r19)(r4) + ld r20, VCPU_GPR(r20)(r4) + ld r21, VCPU_GPR(r21)(r4) + ld r22, VCPU_GPR(r22)(r4) + ld r23, VCPU_GPR(r23)(r4) + ld r24, VCPU_GPR(r24)(r4) + ld r25, VCPU_GPR(r25)(r4) + ld r26, VCPU_GPR(r26)(r4) + ld r27, VCPU_GPR(r27)(r4) + ld r28, VCPU_GPR(r28)(r4) + ld r29, VCPU_GPR(r29)(r4) + ld r30, VCPU_GPR(r30)(r4) + ld r31, VCPU_GPR(r31)(r4) + BEGIN_FTR_SECTION /* Switch DSCR to guest value */ ld r5, VCPU_DSCR(r4) @@ -256,12 +266,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) mtspr SPRN_DAR, r5 mtspr SPRN_DSISR, r6 - /* Set partition DABR */ - li r5,3 - ld r6,VCPU_DABR(r4) - mtspr SPRN_DABRX,r5 - mtspr SPRN_DABR,r6 - BEGIN_FTR_SECTION /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) @@ -955,12 +959,6 @@ BEGIN_FTR_SECTION mtspr SPRN_AMR,r6 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - /* Restore host DABR and DABRX */ - ld r5,HSTATE_DABR(r13) - li r6,7 - mtspr SPRN_DABR,r5 - mtspr SPRN_DABRX,r6 - /* Switch DSCR back to host value */ BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR @@ -999,6 +997,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r5, VCPU_SPRG2(r9) std r6, VCPU_SPRG3(r9) + /* save FP state */ + mr r3, r9 + bl .kvmppc_save_fp + /* Increment yield count if they have a VPA */ ld r8, VCPU_VPA(r9) /* do they have a VPA? */ cmpdi r8, 0 @@ -1013,6 +1015,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r4, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + mfspr r6, SPRN_MMCRA +BEGIN_FTR_SECTION + /* On P7, clear MMCRA in order to disable SDAR updates */ + li r7, 0 + mtspr SPRN_MMCRA, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync beq 21f /* if no VPA, save PMU stuff anyway */ lbz r7, LPPACA_PMCINUSE(r8) @@ -1021,7 +1029,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ b 22f 21: mfspr r5, SPRN_MMCR1 - mfspr r6, SPRN_MMCRA std r4, VCPU_MMCR(r9) std r5, VCPU_MMCR + 8(r9) std r6, VCPU_MMCR + 16(r9) @@ -1046,17 +1053,20 @@ BEGIN_FTR_SECTION stw r11, VCPU_PMC + 28(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 22: - /* save FP state */ - mr r3, r9 - bl .kvmppc_save_fp /* Secondary threads go off to take a nap on POWER7 */ BEGIN_FTR_SECTION - lwz r0,VCPU_PTID(r3) + lwz r0,VCPU_PTID(r9) cmpwi r0,0 bne secondary_nap END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + /* Restore host DABR and DABRX */ + ld r5,HSTATE_DABR(r13) + li r6,7 + mtspr SPRN_DABR,r5 + mtspr SPRN_DABRX,r6 + /* * Reload DEC. HDEC interrupts were disabled when * we reloaded the host's LPCR value. @@ -1393,7 +1403,12 @@ bounce_ext_interrupt: _GLOBAL(kvmppc_h_set_dabr) std r4,VCPU_DABR(r3) - mtspr SPRN_DABR,r4 + /* Work around P7 bug where DABR can get corrupted on mtspr */ +1: mtspr SPRN_DABR,r4 + mfspr r5, SPRN_DABR + cmpd r4, r5 + bne 1b + isync li r3,0 blr @@ -1615,8 +1630,8 @@ kvm_no_guest: * r3 = vcpu pointer */ _GLOBAL(kvmppc_save_fp) - mfmsr r9 - ori r8,r9,MSR_FP + mfmsr r5 + ori r8,r5,MSR_FP #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION oris r8,r8,MSR_VEC@h @@ -1665,7 +1680,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif mfspr r6,SPRN_VRSAVE stw r6,VCPU_VRSAVE(r3) - mtmsrd r9 + mtmsrd r5 isync blr -- cgit v1.2.3-70-g09d2