From a25f7e1f8c1ff68213a63dada9d5e32dc1a0f587 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 30 Apr 2007 17:05:38 +0300 Subject: KVM: Reduce misfirings of the fork detector The kvm mmu tries to detects forks by looking for repeated writes to a page table. If it sees a fork, it unshadows the page table so the page table copying can proceed at native speed instead of being emulated. However, the detector also triggered on simple demand paging access patterns: a linear walk of memory would of course cause repeated writes to the same pagetable page, causing it to unshadow prematurely. Fix by resetting the fork detector if we detect a demand fault. Signed-off-by: Avi Kivity --- drivers/kvm/paging_tmpl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 73ffbffb109..bc64cceec03 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -421,6 +421,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, pgprintk("%s: guest page fault\n", __FUNCTION__); inject_page_fault(vcpu, addr, walker.error_code); FNAME(release_walker)(&walker); + vcpu->last_pt_write_count = 0; /* reset fork detector */ return 0; } @@ -442,6 +443,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, FNAME(release_walker)(&walker); + if (!write_pt) + vcpu->last_pt_write_count = 0; /* reset fork detector */ + /* * mmio: emulate if accessible, otherwise its a guest fault. */ -- cgit v1.2.3-70-g09d2 From 0028425f647b6b78a0de8810d6b782fc3ce6c272 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 1 May 2007 16:53:31 +0300 Subject: KVM: Update shadow pte on write to guest pte A typical demand page/copy on write pattern is: - page fault on vaddr - kvm propagates fault to guest - guest handles fault, updates pte - kvm traps write, clears shadow pte, resumes guest - guest returns to userspace, re-faults on same vaddr - kvm installs shadow pte, resumes guest - guest continues So, three vmexits for a single guest page fault. But if instead of clearing the page table entry, we update to correspond to the value that the guest has just written, we eliminate the third vmexit. This patch does exactly that, reducing kbuild time by about 10%. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 15 +++++++++++++++ drivers/kvm/paging_tmpl.h | 15 +++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 23dc4612026..9ec3df90dbb 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1137,6 +1137,20 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, *spte = 0; } +static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *page, + u64 *spte, + const void *new, int bytes) +{ + if (page->role.level != PT_PAGE_TABLE_LEVEL) + return; + + if (page->role.glevels == PT32_ROOT_LEVEL) + paging32_update_pte(vcpu, page, spte, new, bytes); + else + paging64_update_pte(vcpu, page, spte, new, bytes); +} + void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *old, const u8 *new, int bytes) { @@ -1212,6 +1226,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, spte += page_offset / sizeof(*spte); while (npte--) { mmu_pte_write_zap_pte(vcpu, page, spte); + mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); ++spte; } } diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index bc64cceec03..10ba0a80ce5 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -202,6 +202,21 @@ static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte, guest_pte & PT_DIRTY_MASK, access_bits, gfn); } +static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, + u64 *spte, const void *pte, int bytes) +{ + pt_element_t gpte; + + if (bytes < sizeof(pt_element_t)) + return; + gpte = *(const pt_element_t *)pte; + if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) + return; + pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); + FNAME(set_pte)(vcpu, gpte, spte, 6, + (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); +} + static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde, u64 *shadow_pte, u64 access_bits, gfn_t gfn) { -- cgit v1.2.3-70-g09d2 From 47ad8e689b4f94f9fc3b2588a7aaa65e4eca667c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 6 May 2007 15:50:58 +0300 Subject: KVM: MMU: Store shadow page tables as kernel virtual addresses, not physical Simpifies things a bit. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 +- drivers/kvm/mmu.c | 32 +++++++++++++++----------------- drivers/kvm/paging_tmpl.h | 2 +- 3 files changed, 17 insertions(+), 19 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 5e6dac5a3c0..fc4a6c1235f 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -139,7 +139,7 @@ struct kvm_mmu_page { gfn_t gfn; union kvm_mmu_page_role role; - hpa_t page_hpa; + u64 *spt; unsigned long slot_bitmap; /* One bit set per slot which has memory * in this shadow page. */ diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a96c9ae54f3..c85c6649280 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -439,13 +439,12 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) } #ifdef MMU_DEBUG -static int is_empty_shadow_page(hpa_t page_hpa) +static int is_empty_shadow_page(u64 *spt) { u64 *pos; u64 *end; - for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64); - pos != end; pos++) + for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) if (*pos != 0) { printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, pos, *pos); @@ -458,7 +457,7 @@ static int is_empty_shadow_page(hpa_t page_hpa) static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page_head) { - ASSERT(is_empty_shadow_page(page_head->page_hpa)); + ASSERT(is_empty_shadow_page(page_head->spt)); list_move(&page_head->link, &vcpu->free_pages); ++vcpu->kvm->n_free_mmu_pages; } @@ -478,7 +477,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); list_move(&page->link, &vcpu->kvm->active_mmu_pages); - ASSERT(is_empty_shadow_page(page->page_hpa)); + ASSERT(is_empty_shadow_page(page->spt)); page->slot_bitmap = 0; page->multimapped = 0; page->parent_pte = parent_pte; @@ -636,7 +635,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, u64 *pt; u64 ent; - pt = __va(page->page_hpa); + pt = page->spt; if (page->role.level == PT_PAGE_TABLE_LEVEL) { for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { @@ -803,7 +802,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) return -ENOMEM; } - table[index] = new_table->page_hpa | PT_PRESENT_MASK + table[index] = __pa(new_table->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; } table_addr = table[index] & PT64_BASE_ADDR_MASK; @@ -855,7 +854,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); page = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, 0, 0, NULL); - root = page->page_hpa; + root = __pa(page->spt); ++page->root_count; vcpu->mmu.root_hpa = root; return; @@ -876,7 +875,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, !is_paging(vcpu), 0, NULL); - root = page->page_hpa; + root = __pa(page->spt); ++page->root_count; vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; } @@ -1220,8 +1219,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (quadrant != page->role.quadrant) continue; } - spte = __va(page->page_hpa); - spte += page_offset / sizeof(*spte); + spte = &page->spt[page_offset / sizeof(*spte)]; while (npte--) { mmu_pte_write_zap_pte(vcpu, page, spte); mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); @@ -1262,8 +1260,8 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu) page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); list_del(&page->link); - __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); - page->page_hpa = INVALID_PAGE; + free_page((unsigned long)page->spt); + page->spt = NULL; } free_page((unsigned long)vcpu->mmu.pae_root); } @@ -1282,8 +1280,8 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) if ((page = alloc_page(GFP_KERNEL)) == NULL) goto error_1; set_page_private(page, (unsigned long)page_header); - page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; - memset(__va(page_header->page_hpa), 0, PAGE_SIZE); + page_header->spt = page_address(page); + memset(page_header->spt, 0, PAGE_SIZE); list_add(&page_header->link, &vcpu->free_pages); ++vcpu->kvm->n_free_mmu_pages; } @@ -1346,7 +1344,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) if (!test_bit(slot, &page->slot_bitmap)) continue; - pt = __va(page->page_hpa); + pt = page->spt; for (i = 0; i < PT64_ENT_PER_PAGE; ++i) /* avoid RMW */ if (pt[i] & PT_WRITABLE_MASK) { @@ -1497,7 +1495,7 @@ static int count_writable_mappings(struct kvm_vcpu *vcpu) int i; list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { - u64 *pt = __va(page->page_hpa); + u64 *pt = page->spt; if (page->role.level != PT_PAGE_TABLE_LEVEL) continue; diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 10ba0a80ce5..6dd0da9a5d1 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -304,7 +304,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, metaphysical, hugepage_access, shadow_ent); - shadow_addr = shadow_page->page_hpa; + shadow_addr = __pa(shadow_page->spt); shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; *shadow_ent = shadow_pte; -- cgit v1.2.3-70-g09d2 From 8d7282036f82244c5a1146a1a7edf03c50d278d9 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Tue, 29 May 2007 15:07:21 +0300 Subject: KVM: Use symbolic constants instead of magic numbers Signed-off-by: Avi Kivity --- drivers/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 6dd0da9a5d1..183d4ca9b31 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -213,7 +213,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) return; pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); - FNAME(set_pte)(vcpu, gpte, spte, 6, + FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); } -- cgit v1.2.3-70-g09d2 From ef0197e8d9273ad8fbfb1bbd30e46e42a32c79e8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 30 May 2007 14:21:51 +0300 Subject: KVM: MMU: Simplify fetch() a little bit Signed-off-by: Avi Kivity --- drivers/kvm/paging_tmpl.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 183d4ca9b31..e094a8ba17a 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -241,6 +241,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, { hpa_t shadow_addr; int level; + u64 *shadow_ent; u64 *prev_shadow_ent = NULL; pt_element_t *guest_ent = walker->ptep; @@ -257,13 +258,13 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, for (; ; level--) { u32 index = SHADOW_PT_INDEX(addr, level); - u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index; struct kvm_mmu_page *shadow_page; u64 shadow_pte; int metaphysical; gfn_t table_gfn; unsigned hugepage_access = 0; + shadow_ent = ((u64 *)__va(shadow_addr)) + index; if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { if (level == PT_PAGE_TABLE_LEVEL) return shadow_ent; @@ -272,22 +273,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, continue; } - if (level == PT_PAGE_TABLE_LEVEL) { - - if (walker->level == PT_DIRECTORY_LEVEL) { - if (prev_shadow_ent) - *prev_shadow_ent |= PT_SHADOW_PS_MARK; - FNAME(set_pde)(vcpu, *guest_ent, shadow_ent, - walker->inherited_ar, - walker->gfn); - } else { - ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); - FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, - walker->inherited_ar, - walker->gfn); - } - return shadow_ent; - } + if (level == PT_PAGE_TABLE_LEVEL) + break; if (level - 1 == PT_PAGE_TABLE_LEVEL && walker->level == PT_DIRECTORY_LEVEL) { @@ -310,6 +297,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, *shadow_ent = shadow_pte; prev_shadow_ent = shadow_ent; } + + if (walker->level == PT_DIRECTORY_LEVEL) { + if (prev_shadow_ent) + *prev_shadow_ent |= PT_SHADOW_PS_MARK; + FNAME(set_pde)(vcpu, *guest_ent, shadow_ent, + walker->inherited_ar, walker->gfn); + } else { + ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); + FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, + walker->inherited_ar, + walker->gfn); + } + return shadow_ent; } /* -- cgit v1.2.3-70-g09d2 From e60d75ea292071e7ab33c10ca73fdd33fcbbe501 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 30 May 2007 19:31:17 +0300 Subject: KVM: MMU: Move set_pte_common() to pte width dependent code In preparation of some modifications. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 48 ---------------------------------------- drivers/kvm/paging_tmpl.h | 56 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 52 insertions(+), 52 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 46491b4cd85..a7631502f22 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -965,54 +965,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); } -static inline void set_pte_common(struct kvm_vcpu *vcpu, - u64 *shadow_pte, - gpa_t gaddr, - int dirty, - u64 access_bits, - gfn_t gfn) -{ - hpa_t paddr; - - *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; - if (!dirty) - access_bits &= ~PT_WRITABLE_MASK; - - paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); - - *shadow_pte |= access_bits; - - if (is_error_hpa(paddr)) { - *shadow_pte |= gaddr; - *shadow_pte |= PT_SHADOW_IO_MARK; - *shadow_pte &= ~PT_PRESENT_MASK; - return; - } - - *shadow_pte |= paddr; - - if (access_bits & PT_WRITABLE_MASK) { - struct kvm_mmu_page *shadow; - - shadow = kvm_mmu_lookup_page(vcpu, gfn); - if (shadow) { - pgprintk("%s: found shadow page for %lx, marking ro\n", - __FUNCTION__, gfn); - access_bits &= ~PT_WRITABLE_MASK; - if (is_writeble_pte(*shadow_pte)) { - *shadow_pte &= ~PT_WRITABLE_MASK; - kvm_arch_ops->tlb_flush(vcpu); - } - } - } - - if (access_bits & PT_WRITABLE_MASK) - mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); - - page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); - rmap_add(vcpu, shadow_pte); -} - static void inject_page_fault(struct kvm_vcpu *vcpu, u64 addr, u32 err_code) diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index e094a8ba17a..65763007f04 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -192,14 +192,62 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]); } +static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, + u64 *shadow_pte, + gpa_t gaddr, + int dirty, + u64 access_bits, + gfn_t gfn) +{ + hpa_t paddr; + + *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; + if (!dirty) + access_bits &= ~PT_WRITABLE_MASK; + + paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); + + *shadow_pte |= access_bits; + + if (is_error_hpa(paddr)) { + *shadow_pte |= gaddr; + *shadow_pte |= PT_SHADOW_IO_MARK; + *shadow_pte &= ~PT_PRESENT_MASK; + return; + } + + *shadow_pte |= paddr; + + if (access_bits & PT_WRITABLE_MASK) { + struct kvm_mmu_page *shadow; + + shadow = kvm_mmu_lookup_page(vcpu, gfn); + if (shadow) { + pgprintk("%s: found shadow page for %lx, marking ro\n", + __FUNCTION__, gfn); + access_bits &= ~PT_WRITABLE_MASK; + if (is_writeble_pte(*shadow_pte)) { + *shadow_pte &= ~PT_WRITABLE_MASK; + kvm_arch_ops->tlb_flush(vcpu); + } + } + } + + if (access_bits & PT_WRITABLE_MASK) + mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); + + page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); + rmap_add(vcpu, shadow_pte); +} + static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte, u64 *shadow_pte, u64 access_bits, gfn_t gfn) { ASSERT(*shadow_pte == 0); access_bits &= guest_pte; *shadow_pte = (guest_pte & PT_PTE_COPY_MASK); - set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK, - guest_pte & PT_DIRTY_MASK, access_bits, gfn); + FNAME(set_pte_common)(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK, + guest_pte & PT_DIRTY_MASK, access_bits, gfn); } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, @@ -229,8 +277,8 @@ static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde, gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) << (32 - PT32_DIR_PSE36_SHIFT); *shadow_pte = guest_pde & PT_PTE_COPY_MASK; - set_pte_common(vcpu, shadow_pte, gaddr, - guest_pde & PT_DIRTY_MASK, access_bits, gfn); + FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, + guest_pde & PT_DIRTY_MASK, access_bits, gfn); } /* -- cgit v1.2.3-70-g09d2 From 6598c8b2420c30b48fc0d1d40d9ef6a1f7312107 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 11:45:18 +0300 Subject: KVM: MMU: Pass the guest pde to set_pte_common We will need the accessed bit (in addition to the dirty bit) and also write access (for setting the dirty bit) in a future patch. Signed-off-by: Avi Kivity --- drivers/kvm/paging_tmpl.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 65763007f04..7e998d19384 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -195,11 +195,12 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, u64 *shadow_pte, gpa_t gaddr, - int dirty, + pt_element_t *gpte, u64 access_bits, gfn_t gfn) { hpa_t paddr; + int dirty = *gpte & PT_DIRTY_MASK; *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; if (!dirty) @@ -240,14 +241,14 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, rmap_add(vcpu, shadow_pte); } -static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte, +static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, u64 *shadow_pte, u64 access_bits, gfn_t gfn) { ASSERT(*shadow_pte == 0); - access_bits &= guest_pte; - *shadow_pte = (guest_pte & PT_PTE_COPY_MASK); - FNAME(set_pte_common)(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK, - guest_pte & PT_DIRTY_MASK, access_bits, gfn); + access_bits &= *gpte; + *shadow_pte = (*gpte & PT_PTE_COPY_MASK); + FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, + gpte, access_bits, gfn); } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, @@ -261,24 +262,24 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) return; pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); - FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, + FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); } -static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde, +static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, u64 *shadow_pte, u64 access_bits, gfn_t gfn) { gpa_t gaddr; ASSERT(*shadow_pte == 0); - access_bits &= guest_pde; + access_bits &= *gpde; gaddr = (gpa_t)gfn << PAGE_SHIFT; if (PTTYPE == 32 && is_cpuid_PSE36()) - gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) << + gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << (32 - PT32_DIR_PSE36_SHIFT); - *shadow_pte = guest_pde & PT_PTE_COPY_MASK; + *shadow_pte = *gpde & PT_PTE_COPY_MASK; FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, - guest_pde & PT_DIRTY_MASK, access_bits, gfn); + gpde, access_bits, gfn); } /* @@ -349,11 +350,11 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (walker->level == PT_DIRECTORY_LEVEL) { if (prev_shadow_ent) *prev_shadow_ent |= PT_SHADOW_PS_MARK; - FNAME(set_pde)(vcpu, *guest_ent, shadow_ent, + FNAME(set_pde)(vcpu, guest_ent, shadow_ent, walker->inherited_ar, walker->gfn); } else { ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); - FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, + FNAME(set_pte)(vcpu, guest_ent, shadow_ent, walker->inherited_ar, walker->gfn); } -- cgit v1.2.3-70-g09d2 From 63b1ad24d2695db3ec1cc8b10760e130e1a1f04b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 11:56:54 +0300 Subject: KVM: MMU: Fold fix_read_pf() into set_pte_common() Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 17 ----------------- drivers/kvm/paging_tmpl.h | 34 +++++++++++++++++++++++----------- 2 files changed, 23 insertions(+), 28 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a7631502f22..2079d69f186 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -972,23 +972,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); } -static inline int fix_read_pf(u64 *shadow_ent) -{ - if ((*shadow_ent & PT_SHADOW_USER_MASK) && - !(*shadow_ent & PT_USER_MASK)) { - /* - * If supervisor write protect is disabled, we shadow kernel - * pages as user pages so we can trap the write access. - */ - *shadow_ent |= PT_USER_MASK; - *shadow_ent &= ~PT_WRITABLE_MASK; - - return 1; - - } - return 0; -} - static void paging_free(struct kvm_vcpu *vcpu) { nonpaging_free(vcpu); diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 7e998d19384..869582befaf 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -197,6 +197,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, gpa_t gaddr, pt_element_t *gpte, u64 access_bits, + int write_fault, gfn_t gfn) { hpa_t paddr; @@ -219,6 +220,17 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, *shadow_pte |= paddr; + if (!write_fault && (*shadow_pte & PT_SHADOW_USER_MASK) && + !(*shadow_pte & PT_USER_MASK)) { + /* + * If supervisor write protect is disabled, we shadow kernel + * pages as user pages so we can trap the write access. + */ + *shadow_pte |= PT_USER_MASK; + *shadow_pte &= ~PT_WRITABLE_MASK; + access_bits &= ~PT_WRITABLE_MASK; + } + if (access_bits & PT_WRITABLE_MASK) { struct kvm_mmu_page *shadow; @@ -242,13 +254,14 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, } static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, - u64 *shadow_pte, u64 access_bits, gfn_t gfn) + u64 *shadow_pte, u64 access_bits, + int write_fault, gfn_t gfn) { ASSERT(*shadow_pte == 0); access_bits &= *gpte; *shadow_pte = (*gpte & PT_PTE_COPY_MASK); FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, - gpte, access_bits, gfn); + gpte, access_bits, write_fault, gfn); } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, @@ -262,12 +275,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) return; pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); - FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, + FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); } static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, - u64 *shadow_pte, u64 access_bits, gfn_t gfn) + u64 *shadow_pte, u64 access_bits, int write_fault, + gfn_t gfn) { gpa_t gaddr; @@ -279,14 +293,14 @@ static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, (32 - PT32_DIR_PSE36_SHIFT); *shadow_pte = *gpde & PT_PTE_COPY_MASK; FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, - gpde, access_bits, gfn); + gpde, access_bits, write_fault, gfn); } /* * Fetch a shadow pte for a specific level in the paging hierarchy. */ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, - struct guest_walker *walker) + struct guest_walker *walker, int write_fault) { hpa_t shadow_addr; int level; @@ -351,12 +365,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (prev_shadow_ent) *prev_shadow_ent |= PT_SHADOW_PS_MARK; FNAME(set_pde)(vcpu, guest_ent, shadow_ent, - walker->inherited_ar, walker->gfn); + walker->inherited_ar, write_fault, walker->gfn); } else { ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); FNAME(set_pte)(vcpu, guest_ent, shadow_ent, walker->inherited_ar, - walker->gfn); + write_fault, walker->gfn); } return shadow_ent; } @@ -489,7 +503,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, return 0; } - shadow_pte = FNAME(fetch)(vcpu, addr, &walker); + shadow_pte = FNAME(fetch)(vcpu, addr, &walker, write_fault); pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, shadow_pte, *shadow_pte); @@ -499,8 +513,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, if (write_fault) fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr, user_fault, &write_pt); - else - fixed = fix_read_pf(shadow_pte); pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__, shadow_pte, *shadow_pte); -- cgit v1.2.3-70-g09d2 From 97a0a01ea9229e4f3f0f06e0584227e9687159a5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 15:08:29 +0300 Subject: KVM: MMU: Fold fix_write_pf() into set_pte_common() This prevents some work from being performed twice, and, more importantly, reduces the number of places where we modify shadow ptes. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 11 +++ drivers/kvm/paging_tmpl.h | 168 ++++++++++++++++------------------------------ 2 files changed, 68 insertions(+), 111 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2079d69f186..3cdbf687df2 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -731,6 +731,17 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) return r; } +static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + struct kvm_mmu_page *page; + + while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { + pgprintk("%s: zap %lx %x\n", + __FUNCTION__, gfn, page->role.word); + kvm_mmu_zap_page(vcpu, page); + } +} + static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) { int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 869582befaf..c0672038555 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -197,11 +197,26 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, gpa_t gaddr, pt_element_t *gpte, u64 access_bits, + int user_fault, int write_fault, + int *ptwrite, + struct guest_walker *walker, gfn_t gfn) { hpa_t paddr; int dirty = *gpte & PT_DIRTY_MASK; + int was_rmapped = is_rmap_pte(*shadow_pte); + + pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" + " user_fault %d gfn %lx\n", + __FUNCTION__, *shadow_pte, (u64)*gpte, access_bits, + write_fault, user_fault, gfn); + + if (write_fault && !dirty) { + *gpte |= PT_DIRTY_MASK; + dirty = 1; + FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); + } *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; if (!dirty) @@ -209,7 +224,9 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); - *shadow_pte |= access_bits; + *shadow_pte |= PT_PRESENT_MASK; + if (access_bits & PT_USER_MASK) + *shadow_pte |= PT_USER_MASK; if (is_error_hpa(paddr)) { *shadow_pte |= gaddr; @@ -231,37 +248,50 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, access_bits &= ~PT_WRITABLE_MASK; } - if (access_bits & PT_WRITABLE_MASK) { + if ((access_bits & PT_WRITABLE_MASK) + || (write_fault && !is_write_protection(vcpu) && !user_fault)) { struct kvm_mmu_page *shadow; + *shadow_pte |= PT_WRITABLE_MASK; + if (user_fault) { + mmu_unshadow(vcpu, gfn); + goto unshadowed; + } + shadow = kvm_mmu_lookup_page(vcpu, gfn); if (shadow) { pgprintk("%s: found shadow page for %lx, marking ro\n", __FUNCTION__, gfn); access_bits &= ~PT_WRITABLE_MASK; if (is_writeble_pte(*shadow_pte)) { - *shadow_pte &= ~PT_WRITABLE_MASK; - kvm_arch_ops->tlb_flush(vcpu); + *shadow_pte &= ~PT_WRITABLE_MASK; + kvm_arch_ops->tlb_flush(vcpu); } + if (write_fault) + *ptwrite = 1; } } +unshadowed: + if (access_bits & PT_WRITABLE_MASK) mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); - rmap_add(vcpu, shadow_pte); + if (!was_rmapped) + rmap_add(vcpu, shadow_pte); } static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, u64 *shadow_pte, u64 access_bits, - int write_fault, gfn_t gfn) + int user_fault, int write_fault, int *ptwrite, + struct guest_walker *walker, gfn_t gfn) { - ASSERT(*shadow_pte == 0); access_bits &= *gpte; - *shadow_pte = (*gpte & PT_PTE_COPY_MASK); + *shadow_pte |= (*gpte & PT_PTE_COPY_MASK); FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, - gpte, access_bits, write_fault, gfn); + gpte, access_bits, user_fault, write_fault, + ptwrite, walker, gfn); } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, @@ -276,31 +306,34 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, return; pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, + 0, NULL, NULL, (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); } static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, - u64 *shadow_pte, u64 access_bits, int write_fault, - gfn_t gfn) + u64 *shadow_pte, u64 access_bits, + int user_fault, int write_fault, int *ptwrite, + struct guest_walker *walker, gfn_t gfn) { gpa_t gaddr; - ASSERT(*shadow_pte == 0); access_bits &= *gpde; gaddr = (gpa_t)gfn << PAGE_SHIFT; if (PTTYPE == 32 && is_cpuid_PSE36()) gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << (32 - PT32_DIR_PSE36_SHIFT); - *shadow_pte = *gpde & PT_PTE_COPY_MASK; + *shadow_pte |= *gpde & PT_PTE_COPY_MASK; FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, - gpde, access_bits, write_fault, gfn); + gpde, access_bits, user_fault, write_fault, + ptwrite, walker, gfn); } /* * Fetch a shadow pte for a specific level in the paging hierarchy. */ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, - struct guest_walker *walker, int write_fault) + struct guest_walker *walker, + int user_fault, int write_fault, int *ptwrite) { hpa_t shadow_addr; int level; @@ -330,7 +363,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, shadow_ent = ((u64 *)__va(shadow_addr)) + index; if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { if (level == PT_PAGE_TABLE_LEVEL) - return shadow_ent; + break; shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; prev_shadow_ent = shadow_ent; continue; @@ -365,94 +398,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (prev_shadow_ent) *prev_shadow_ent |= PT_SHADOW_PS_MARK; FNAME(set_pde)(vcpu, guest_ent, shadow_ent, - walker->inherited_ar, write_fault, walker->gfn); + walker->inherited_ar, user_fault, write_fault, + ptwrite, walker, walker->gfn); } else { ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); FNAME(set_pte)(vcpu, guest_ent, shadow_ent, - walker->inherited_ar, - write_fault, walker->gfn); + walker->inherited_ar, user_fault, write_fault, + ptwrite, walker, walker->gfn); } return shadow_ent; } -/* - * The guest faulted for write. We need to - * - * - check write permissions - * - update the guest pte dirty bit - * - update our own dirty page tracking structures - */ -static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, - u64 *shadow_ent, - struct guest_walker *walker, - gva_t addr, - int user, - int *write_pt) -{ - pt_element_t *guest_ent; - int writable_shadow; - gfn_t gfn; - struct kvm_mmu_page *page; - - if (is_writeble_pte(*shadow_ent)) - return !user || (*shadow_ent & PT_USER_MASK); - - writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK; - if (user) { - /* - * User mode access. Fail if it's a kernel page or a read-only - * page. - */ - if (!(*shadow_ent & PT_SHADOW_USER_MASK) || !writable_shadow) - return 0; - ASSERT(*shadow_ent & PT_USER_MASK); - } else - /* - * Kernel mode access. Fail if it's a read-only page and - * supervisor write protection is enabled. - */ - if (!writable_shadow) { - if (is_write_protection(vcpu)) - return 0; - *shadow_ent &= ~PT_USER_MASK; - } - - guest_ent = walker->ptep; - - if (!is_present_pte(*guest_ent)) { - *shadow_ent = 0; - return 0; - } - - gfn = walker->gfn; - - if (user) { - /* - * Usermode page faults won't be for page table updates. - */ - while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { - pgprintk("%s: zap %lx %x\n", - __FUNCTION__, gfn, page->role.word); - kvm_mmu_zap_page(vcpu, page); - } - } else if (kvm_mmu_lookup_page(vcpu, gfn)) { - pgprintk("%s: found shadow page for %lx, marking ro\n", - __FUNCTION__, gfn); - mark_page_dirty(vcpu->kvm, gfn); - FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); - *guest_ent |= PT_DIRTY_MASK; - *write_pt = 1; - return 0; - } - mark_page_dirty(vcpu->kvm, gfn); - *shadow_ent |= PT_WRITABLE_MASK; - FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); - *guest_ent |= PT_DIRTY_MASK; - rmap_add(vcpu, shadow_ent); - - return 1; -} - /* * Page fault handler. There are several causes for a page fault: * - there is no shadow pte for the guest pte @@ -475,7 +431,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int fetch_fault = error_code & PFERR_FETCH_MASK; struct guest_walker walker; u64 *shadow_pte; - int fixed; int write_pt = 0; int r; @@ -503,19 +458,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, return 0; } - shadow_pte = FNAME(fetch)(vcpu, addr, &walker, write_fault); - pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, - shadow_pte, *shadow_pte); - - /* - * Update the shadow pte. - */ - if (write_fault) - fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr, - user_fault, &write_pt); - - pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__, - shadow_pte, *shadow_pte); + shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, + &write_pt); + pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, + shadow_pte, *shadow_pte, write_pt); FNAME(release_walker)(&walker); -- cgit v1.2.3-70-g09d2 From a18de5a403f9b5010527b2e7b05049b539b4facd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 15:14:09 +0300 Subject: KVM: Move shadow pte modifications from set_pte/set_pde to set_pde_common() We want all shadow pte modifications in one place. Signed-off-by: Avi Kivity --- drivers/kvm/paging_tmpl.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index c0672038555..35f264f346d 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -218,6 +218,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); } + *shadow_pte |= *gpte & PT_PTE_COPY_MASK; *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; if (!dirty) access_bits &= ~PT_WRITABLE_MASK; @@ -288,7 +289,6 @@ static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, struct guest_walker *walker, gfn_t gfn) { access_bits &= *gpte; - *shadow_pte |= (*gpte & PT_PTE_COPY_MASK); FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, gpte, access_bits, user_fault, write_fault, ptwrite, walker, gfn); @@ -322,7 +322,6 @@ static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, if (PTTYPE == 32 && is_cpuid_PSE36()) gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << (32 - PT32_DIR_PSE36_SHIFT); - *shadow_pte |= *gpde & PT_PTE_COPY_MASK; FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, gpde, access_bits, user_fault, write_fault, ptwrite, walker, gfn); -- cgit v1.2.3-70-g09d2 From 0d551bb698e1328f685ae3611c4a4a96f41bef97 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 15:23:35 +0300 Subject: KVM: Make shadow pte updates atomic With guest smp, a second vcpu might see partial updates when the first vcpu services a page fault. So delay all updates until we have figured out what the pte should look like. Note that on i386, this is still not completely atomic as a 64-bit write will be split into two on a 32-bit machine. Signed-off-by: Avi Kivity --- drivers/kvm/paging_tmpl.h | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 35f264f346d..397a4039eaa 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -205,11 +205,12 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, { hpa_t paddr; int dirty = *gpte & PT_DIRTY_MASK; - int was_rmapped = is_rmap_pte(*shadow_pte); + u64 spte = *shadow_pte; + int was_rmapped = is_rmap_pte(spte); pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" " user_fault %d gfn %lx\n", - __FUNCTION__, *shadow_pte, (u64)*gpte, access_bits, + __FUNCTION__, spte, (u64)*gpte, access_bits, write_fault, user_fault, gfn); if (write_fault && !dirty) { @@ -218,34 +219,35 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); } - *shadow_pte |= *gpte & PT_PTE_COPY_MASK; - *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; + spte |= *gpte & PT_PTE_COPY_MASK; + spte |= access_bits << PT_SHADOW_BITS_OFFSET; if (!dirty) access_bits &= ~PT_WRITABLE_MASK; paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); - *shadow_pte |= PT_PRESENT_MASK; + spte |= PT_PRESENT_MASK; if (access_bits & PT_USER_MASK) - *shadow_pte |= PT_USER_MASK; + spte |= PT_USER_MASK; if (is_error_hpa(paddr)) { - *shadow_pte |= gaddr; - *shadow_pte |= PT_SHADOW_IO_MARK; - *shadow_pte &= ~PT_PRESENT_MASK; + spte |= gaddr; + spte |= PT_SHADOW_IO_MARK; + spte &= ~PT_PRESENT_MASK; + *shadow_pte = spte; return; } - *shadow_pte |= paddr; + spte |= paddr; - if (!write_fault && (*shadow_pte & PT_SHADOW_USER_MASK) && - !(*shadow_pte & PT_USER_MASK)) { + if (!write_fault && (spte & PT_SHADOW_USER_MASK) && + !(spte & PT_USER_MASK)) { /* * If supervisor write protect is disabled, we shadow kernel * pages as user pages so we can trap the write access. */ - *shadow_pte |= PT_USER_MASK; - *shadow_pte &= ~PT_WRITABLE_MASK; + spte |= PT_USER_MASK; + spte &= ~PT_WRITABLE_MASK; access_bits &= ~PT_WRITABLE_MASK; } @@ -253,7 +255,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, || (write_fault && !is_write_protection(vcpu) && !user_fault)) { struct kvm_mmu_page *shadow; - *shadow_pte |= PT_WRITABLE_MASK; + spte |= PT_WRITABLE_MASK; if (user_fault) { mmu_unshadow(vcpu, gfn); goto unshadowed; @@ -264,8 +266,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, pgprintk("%s: found shadow page for %lx, marking ro\n", __FUNCTION__, gfn); access_bits &= ~PT_WRITABLE_MASK; - if (is_writeble_pte(*shadow_pte)) { - *shadow_pte &= ~PT_WRITABLE_MASK; + if (is_writeble_pte(spte)) { + spte &= ~PT_WRITABLE_MASK; kvm_arch_ops->tlb_flush(vcpu); } if (write_fault) @@ -278,6 +280,7 @@ unshadowed: if (access_bits & PT_WRITABLE_MASK) mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); + *shadow_pte = spte; page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); if (!was_rmapped) rmap_add(vcpu, shadow_pte); -- cgit v1.2.3-70-g09d2 From e663ee64aefc57f7eff7325142206c4ea0200be8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 15:46:04 +0300 Subject: KVM: MMU: Make setting shadow ptes atomic on i386 Signed-off-by: Avi Kivity --- drivers/kvm/Kconfig | 1 + drivers/kvm/mmu.c | 14 ++++++++++++-- drivers/kvm/paging_tmpl.h | 4 ++-- 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig index 2f661e5f0da..33fa28a8c19 100644 --- a/drivers/kvm/Kconfig +++ b/drivers/kvm/Kconfig @@ -11,6 +11,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on X86 && EXPERIMENTAL + depends on X86_CMPXCHG64 || 64BIT ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 3cdbf687df2..f24b540148a 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "vmx.h" #include "kvm.h" @@ -204,6 +205,15 @@ static int is_rmap_pte(u64 pte) == (PT_WRITABLE_MASK | PT_PRESENT_MASK); } +static void set_shadow_pte(u64 *sptep, u64 spte) +{ +#ifdef CONFIG_X86_64 + set_64bit((unsigned long *)sptep, spte); +#else + set_64bit((unsigned long long *)sptep, spte); +#endif +} + static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, struct kmem_cache *base_cache, int min, gfp_t gfp_flags) @@ -446,7 +456,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); rmap_remove(vcpu, spte); kvm_arch_ops->tlb_flush(vcpu); - *spte &= ~(u64)PT_WRITABLE_MASK; + set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); } } @@ -699,7 +709,7 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, } BUG_ON(!parent_pte); kvm_mmu_put_page(vcpu, page, parent_pte); - *parent_pte = 0; + set_shadow_pte(parent_pte, 0); } kvm_mmu_page_unlink_children(vcpu, page); if (!page->root_count) { diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 397a4039eaa..fabc2c9093c 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -234,7 +234,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, spte |= gaddr; spte |= PT_SHADOW_IO_MARK; spte &= ~PT_PRESENT_MASK; - *shadow_pte = spte; + set_shadow_pte(shadow_pte, spte); return; } @@ -280,7 +280,7 @@ unshadowed: if (access_bits & PT_WRITABLE_MASK) mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); - *shadow_pte = spte; + set_shadow_pte(shadow_pte, spte); page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); if (!was_rmapped) rmap_add(vcpu, shadow_pte); -- cgit v1.2.3-70-g09d2 From 4436d466219a6a7874ebc19eb6523c3a9a280dcc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 17:17:06 +0300 Subject: KVM: MMU: Remove cr0.wp tricks No longer needed as we do everything in one place. Signed-off-by: Avi Kivity --- drivers/kvm/paging_tmpl.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index fabc2c9093c..59b4cb29e0f 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -240,17 +240,6 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, spte |= paddr; - if (!write_fault && (spte & PT_SHADOW_USER_MASK) && - !(spte & PT_USER_MASK)) { - /* - * If supervisor write protect is disabled, we shadow kernel - * pages as user pages so we can trap the write access. - */ - spte |= PT_USER_MASK; - spte &= ~PT_WRITABLE_MASK; - access_bits &= ~PT_WRITABLE_MASK; - } - if ((access_bits & PT_WRITABLE_MASK) || (write_fault && !is_write_protection(vcpu) && !user_fault)) { struct kvm_mmu_page *shadow; -- cgit v1.2.3-70-g09d2 From fd97dc516c372982f9c3637e20b131e1f55ac2f6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 18:20:14 +0300 Subject: KVM: MMU: Simpify accessed/dirty/present/nx bit handling Always set the accessed and dirty bit (since having them cleared causes a read-modify-write cycle), always set the present bit, and copy the nx bit from the guest. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 5 ----- drivers/kvm/paging_tmpl.h | 7 ++----- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index f24b540148a..b47391ffe54 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -91,11 +91,6 @@ static int dbg = 1; #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) -#define PT32_PTE_COPY_MASK \ - (PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK) - -#define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK) - #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 59b4cb29e0f..b17a4b783cd 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -31,7 +31,6 @@ #define PT_INDEX(addr, level) PT64_INDEX(addr, level) #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) - #define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS 4 #else @@ -46,7 +45,6 @@ #define PT_INDEX(addr, level) PT32_INDEX(addr, level) #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) - #define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK #define PT_MAX_FULL_LEVELS 2 #else #error Invalid PTTYPE value @@ -219,7 +217,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); } - spte |= *gpte & PT_PTE_COPY_MASK; + spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; + spte |= *gpte & PT64_NX_MASK; spte |= access_bits << PT_SHADOW_BITS_OFFSET; if (!dirty) access_bits &= ~PT_WRITABLE_MASK; @@ -495,7 +494,5 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) #undef PT_INDEX #undef SHADOW_PT_INDEX #undef PT_LEVEL_MASK -#undef PT_PTE_COPY_MASK -#undef PT_NON_PTE_COPY_MASK #undef PT_DIR_BASE_ADDR_MASK #undef PT_MAX_FULL_LEVELS -- cgit v1.2.3-70-g09d2 From b64b3763a5b3868e85330c891e1a30189dcde9b1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 18:24:09 +0300 Subject: KVM: MMU: Don't cache guest access bits in the shadow page table This was once used to avoid accessing the guest pte when upgrading the shadow pte from read-only to read-write. But usually we need to set the guest pte dirty or accessed bits anyway, so this wasn't really exploited. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 8 -------- drivers/kvm/paging_tmpl.h | 1 - 2 files changed, 9 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index b47391ffe54..986d01294f3 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -97,14 +97,6 @@ static int dbg = 1; #define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) -#define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1) -#define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT) - -#define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1) -#define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT)) - -#define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT) - #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define PT64_LEVEL_BITS 9 diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index b17a4b783cd..adc1206cf65 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -219,7 +219,6 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; spte |= *gpte & PT64_NX_MASK; - spte |= access_bits << PT_SHADOW_BITS_OFFSET; if (!dirty) access_bits &= ~PT_WRITABLE_MASK; -- cgit v1.2.3-70-g09d2 From bd2b2baa5c5fbb08b4b0df7508ff419407f7ece6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 31 May 2007 18:28:51 +0300 Subject: KVM: MMU: Remove unused large page marker This has not been used for some time, as the same information is available in the page header. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 1 - drivers/kvm/paging_tmpl.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'drivers/kvm/paging_tmpl.h') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 986d01294f3..283df031b03 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -94,7 +94,6 @@ static int dbg = 1; #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 -#define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define VALID_PAGE(x) ((x) != INVALID_PAGE) diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index adc1206cf65..a7c5cb0319e 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -384,8 +384,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, } if (walker->level == PT_DIRECTORY_LEVEL) { - if (prev_shadow_ent) - *prev_shadow_ent |= PT_SHADOW_PS_MARK; FNAME(set_pde)(vcpu, guest_ent, shadow_ent, walker->inherited_ar, user_fault, write_fault, ptwrite, walker, walker->gfn); -- cgit v1.2.3-70-g09d2