diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-14 17:43:43 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-14 17:43:43 -0700 |
commit | 69def9f05dfce3281bb06599057e6b8097385d39 (patch) | |
tree | 7d826b22924268ddbfad101993b248996d40e2ec /arch/x86/kvm/paging_tmpl.h | |
parent | 353f6dd2dec992ddd34620a94b051b0f76227379 (diff) | |
parent | 8e616fc8d343bd7f0f0a0c22407fdcb77f6d22b1 (diff) |
Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (202 commits)
MAINTAINERS: update KVM entry
KVM: correct error-handling code
KVM: fix compile warnings on s390
KVM: VMX: Check cpl before emulating debug register access
KVM: fix misreporting of coalesced interrupts by kvm tracer
KVM: x86: drop duplicate kvm_flush_remote_tlb calls
KVM: VMX: call vmx_load_host_state() only if msr is cached
KVM: VMX: Conditionally reload debug register 6
KVM: Use thread debug register storage instead of kvm specific data
KVM guest: do not batch pte updates from interrupt context
KVM: Fix coalesced interrupt reporting in IOAPIC
KVM guest: fix bogus wallclock physical address calculation
KVM: VMX: Fix cr8 exiting control clobbering by EPT
KVM: Optimize kvm_mmu_unprotect_page_virt() for tdp
KVM: Document KVM_CAP_IRQCHIP
KVM: Protect update_cr8_intercept() when running without an apic
KVM: VMX: Fix EPT with WP bit change during paging
KVM: Use kvm_{read,write}_guest_virt() to read and write segment descriptors
KVM: x86 emulator: Add adc and sbb missing decoder flags
KVM: Add missing #include
...
Diffstat (limited to 'arch/x86/kvm/paging_tmpl.h')
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 141 |
1 files changed, 74 insertions, 67 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 67785f63539..d2fec9c12d2 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -27,7 +27,8 @@ #define guest_walker guest_walker64 #define FNAME(name) paging##64_##name #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK - #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK + #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) + #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) #define PT_LEVEL_BITS PT64_LEVEL_BITS @@ -43,7 +44,8 @@ #define guest_walker guest_walker32 #define FNAME(name) paging##32_##name #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK - #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK + #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl) + #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT32_INDEX(addr, level) #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) #define PT_LEVEL_BITS PT32_LEVEL_BITS @@ -53,8 +55,8 @@ #error Invalid PTTYPE value #endif -#define gpte_to_gfn FNAME(gpte_to_gfn) -#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde) +#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) +#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL) /* * The guest_walker structure emulates the behavior of the hardware page @@ -71,14 +73,9 @@ struct guest_walker { u32 error_code; }; -static gfn_t gpte_to_gfn(pt_element_t gpte) +static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) { - return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; -} - -static gfn_t gpte_to_gfn_pde(pt_element_t gpte) -{ - return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; + return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; } static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, @@ -125,14 +122,16 @@ static int FNAME(walk_addr)(struct guest_walker *walker, gpa_t pte_gpa; int rsvd_fault = 0; - pgprintk("%s: addr %lx\n", __func__, addr); + trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, + fetch_fault); walk: walker->level = vcpu->arch.mmu.root_level; pte = vcpu->arch.cr3; #if PTTYPE == 64 if (!is_long_mode(vcpu)) { - pte = vcpu->arch.pdptrs[(addr >> 30) & 3]; - if (!is_present_pte(pte)) + pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); + trace_kvm_mmu_paging_element(pte, walker->level); + if (!is_present_gpte(pte)) goto not_present; --walker->level; } @@ -150,12 +149,11 @@ walk: pte_gpa += index * sizeof(pt_element_t); walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - pgprintk("%s: table_gfn[%d] %lx\n", __func__, - walker->level - 1, table_gfn); kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); + trace_kvm_mmu_paging_element(pte, walker->level); - if (!is_present_pte(pte)) + if (!is_present_gpte(pte)) goto not_present; rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); @@ -175,6 +173,8 @@ walk: #endif if (!(pte & PT_ACCESSED_MASK)) { + trace_kvm_mmu_set_accessed_bit(table_gfn, index, + sizeof(pte)); mark_page_dirty(vcpu->kvm, table_gfn); if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, pte|PT_ACCESSED_MASK)) @@ -186,18 +186,24 @@ walk: walker->ptes[walker->level - 1] = pte; - if (walker->level == PT_PAGE_TABLE_LEVEL) { - walker->gfn = gpte_to_gfn(pte); - break; - } - - if (walker->level == PT_DIRECTORY_LEVEL - && (pte & PT_PAGE_SIZE_MASK) - && (PTTYPE == 64 || is_pse(vcpu))) { - walker->gfn = gpte_to_gfn_pde(pte); - walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL); - if (PTTYPE == 32 && is_cpuid_PSE36()) + if ((walker->level == PT_PAGE_TABLE_LEVEL) || + ((walker->level == PT_DIRECTORY_LEVEL) && + (pte & PT_PAGE_SIZE_MASK) && + (PTTYPE == 64 || is_pse(vcpu))) || + ((walker->level == PT_PDPE_LEVEL) && + (pte & PT_PAGE_SIZE_MASK) && + is_long_mode(vcpu))) { + int lvl = walker->level; + + walker->gfn = gpte_to_gfn_lvl(pte, lvl); + walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) + >> PAGE_SHIFT; + + if (PTTYPE == 32 && + walker->level == PT_DIRECTORY_LEVEL && + is_cpuid_PSE36()) walker->gfn += pse36_gfn_delta(pte); + break; } @@ -205,9 +211,10 @@ walk: --walker->level; } - if (write_fault && !is_dirty_pte(pte)) { + if (write_fault && !is_dirty_gpte(pte)) { bool ret; + trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); mark_page_dirty(vcpu->kvm, table_gfn); ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, pte|PT_DIRTY_MASK); @@ -239,6 +246,7 @@ err: walker->error_code |= PFERR_FETCH_MASK; if (rsvd_fault) walker->error_code |= PFERR_RSVD_MASK; + trace_kvm_mmu_walker_error(walker->error_code); return 0; } @@ -248,12 +256,11 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, pt_element_t gpte; unsigned pte_access; pfn_t pfn; - int largepage = vcpu->arch.update_pte.largepage; gpte = *(const pt_element_t *)pte; if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { - if (!is_present_pte(gpte)) - set_shadow_pte(spte, shadow_notrap_nonpresent_pte); + if (!is_present_gpte(gpte)) + __set_spte(spte, shadow_notrap_nonpresent_pte); return; } pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); @@ -267,7 +274,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, return; kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, - gpte & PT_DIRTY_MASK, NULL, largepage, + gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, gpte_to_gfn(gpte), pfn, true); } @@ -276,7 +283,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, */ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *gw, - int user_fault, int write_fault, int largepage, + int user_fault, int write_fault, int hlevel, int *ptwrite, pfn_t pfn) { unsigned access = gw->pt_access; @@ -289,19 +296,18 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, pt_element_t curr_pte; struct kvm_shadow_walk_iterator iterator; - if (!is_present_pte(gw->ptes[gw->level - 1])) + if (!is_present_gpte(gw->ptes[gw->level - 1])) return NULL; for_each_shadow_entry(vcpu, addr, iterator) { level = iterator.level; sptep = iterator.sptep; - if (level == PT_PAGE_TABLE_LEVEL - || (largepage && level == PT_DIRECTORY_LEVEL)) { + if (iterator.level == hlevel) { mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, user_fault, write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, - ptwrite, largepage, + ptwrite, level, gw->gfn, pfn, false); break; } @@ -311,16 +317,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (is_large_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); - set_shadow_pte(sptep, shadow_trap_nonpresent_pte); + __set_spte(sptep, shadow_trap_nonpresent_pte); kvm_flush_remote_tlbs(vcpu->kvm); } - if (level == PT_DIRECTORY_LEVEL - && gw->level == PT_DIRECTORY_LEVEL) { + if (level <= gw->level) { + int delta = level - gw->level + 1; direct = 1; - if (!is_dirty_pte(gw->ptes[level - 1])) + if (!is_dirty_gpte(gw->ptes[level - delta])) access &= ~ACC_WRITE_MASK; - table_gfn = gpte_to_gfn(gw->ptes[level - 1]); + table_gfn = gpte_to_gfn(gw->ptes[level - delta]); + /* advance table_gfn when emulating 1gb pages with 4k */ + if (delta == 0) + table_gfn += PT_INDEX(addr, level); } else { direct = 0; table_gfn = gw->table_gfn[level - 2]; @@ -369,11 +378,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int user_fault = error_code & PFERR_USER_MASK; int fetch_fault = error_code & PFERR_FETCH_MASK; struct guest_walker walker; - u64 *shadow_pte; + u64 *sptep; int write_pt = 0; int r; pfn_t pfn; - int largepage = 0; + int level = PT_PAGE_TABLE_LEVEL; unsigned long mmu_seq; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); @@ -399,14 +408,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, return 0; } - if (walker.level == PT_DIRECTORY_LEVEL) { - gfn_t large_gfn; - large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); - if (is_largepage_backed(vcpu, large_gfn)) { - walker.gfn = large_gfn; - largepage = 1; - } + if (walker.level >= PT_DIRECTORY_LEVEL) { + level = min(walker.level, mapping_level(vcpu, walker.gfn)); + walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); } + mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); @@ -422,11 +428,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; kvm_mmu_free_some_pages(vcpu); - shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, - largepage, &write_pt, pfn); - + sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, + level, &write_pt, pfn); pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, - shadow_pte, *shadow_pte, write_pt); + sptep, *sptep, write_pt); if (!write_pt) vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ @@ -459,8 +464,9 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) sptep = iterator.sptep; /* FIXME: properly handle invlpg on large guest pages */ - if (level == PT_PAGE_TABLE_LEVEL || - ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { + if (level == PT_PAGE_TABLE_LEVEL || + ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || + ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { struct kvm_mmu_page *sp = page_header(__pa(sptep)); pte_gpa = (sp->gfn << PAGE_SHIFT); @@ -472,7 +478,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) --vcpu->kvm->stat.lpages; need_flush = 1; } - set_shadow_pte(sptep, shadow_trap_nonpresent_pte); + __set_spte(sptep, shadow_trap_nonpresent_pte); break; } @@ -489,7 +495,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, sizeof(pt_element_t))) return; - if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { + if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) { if (mmu_topup_memory_caches(vcpu)) return; kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, @@ -536,7 +542,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt); pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t); for (j = 0; j < ARRAY_SIZE(pt); ++j) - if (r || is_present_pte(pt[j])) + if (r || is_present_gpte(pt[j])) sp->spt[i+j] = shadow_trap_nonpresent_pte; else sp->spt[i+j] = shadow_notrap_nonpresent_pte; @@ -574,23 +580,23 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) sizeof(pt_element_t))) return -EINVAL; - if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) || + if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) { u64 nonpresent; rmap_remove(vcpu->kvm, &sp->spt[i]); - if (is_present_pte(gpte)) + if (is_present_gpte(gpte)) nonpresent = shadow_trap_nonpresent_pte; else nonpresent = shadow_notrap_nonpresent_pte; - set_shadow_pte(&sp->spt[i], nonpresent); + __set_spte(&sp->spt[i], nonpresent); continue; } nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, - is_dirty_pte(gpte), 0, gfn, + is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, spte_to_pfn(sp->spt[i]), true, false); } @@ -603,9 +609,10 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) #undef PT_BASE_ADDR_MASK #undef PT_INDEX #undef PT_LEVEL_MASK -#undef PT_DIR_BASE_ADDR_MASK +#undef PT_LVL_ADDR_MASK +#undef PT_LVL_OFFSET_MASK #undef PT_LEVEL_BITS #undef PT_MAX_FULL_LEVELS #undef gpte_to_gfn -#undef gpte_to_gfn_pde +#undef gpte_to_gfn_lvl #undef CMPXCHG |