diff options
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 450 |
1 files changed, 75 insertions, 375 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 44924e551fd..3f6f3347aa1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -46,6 +46,7 @@ #include <linux/module.h> #include <linux/gfp.h> #include <linux/memblock.h> +#include <linux/seq_file.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -173,371 +174,6 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ */ #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) -/* - * Xen leaves the responsibility for maintaining p2m mappings to the - * guests themselves, but it must also access and update the p2m array - * during suspend/resume when all the pages are reallocated. - * - * The p2m table is logically a flat array, but we implement it as a - * three-level tree to allow the address space to be sparse. - * - * Xen - * | - * p2m_top p2m_top_mfn - * / \ / \ - * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn - * / \ / \ / / - * p2m p2m p2m p2m p2m p2m p2m ... - * - * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. - * - * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the - * maximum representable pseudo-physical address space is: - * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages - * - * P2M_PER_PAGE depends on the architecture, as a mfn is always - * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to - * 512 and 1024 entries respectively. - */ - -unsigned long xen_max_p2m_pfn __read_mostly; - -#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) -#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) -#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) - -#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) - -/* Placeholders for holes in the address space */ -static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); - -static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); - -RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); -RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); - -static inline unsigned p2m_top_index(unsigned long pfn) -{ - BUG_ON(pfn >= MAX_P2M_PFN); - return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); -} - -static inline unsigned p2m_mid_index(unsigned long pfn) -{ - return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; -} - -static inline unsigned p2m_index(unsigned long pfn) -{ - return pfn % P2M_PER_PAGE; -} - -static void p2m_top_init(unsigned long ***top) -{ - unsigned i; - - for (i = 0; i < P2M_TOP_PER_PAGE; i++) - top[i] = p2m_mid_missing; -} - -static void p2m_top_mfn_init(unsigned long *top) -{ - unsigned i; - - for (i = 0; i < P2M_TOP_PER_PAGE; i++) - top[i] = virt_to_mfn(p2m_mid_missing_mfn); -} - -static void p2m_top_mfn_p_init(unsigned long **top) -{ - unsigned i; - - for (i = 0; i < P2M_TOP_PER_PAGE; i++) - top[i] = p2m_mid_missing_mfn; -} - -static void p2m_mid_init(unsigned long **mid) -{ - unsigned i; - - for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = p2m_missing; -} - -static void p2m_mid_mfn_init(unsigned long *mid) -{ - unsigned i; - - for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = virt_to_mfn(p2m_missing); -} - -static void p2m_init(unsigned long *p2m) -{ - unsigned i; - - for (i = 0; i < P2M_MID_PER_PAGE; i++) - p2m[i] = INVALID_P2M_ENTRY; -} - -/* - * Build the parallel p2m_top_mfn and p2m_mid_mfn structures - * - * This is called both at boot time, and after resuming from suspend: - * - At boot time we're called very early, and must use extend_brk() - * to allocate memory. - * - * - After resume we're called from within stop_machine, but the mfn - * tree should alreay be completely allocated. - */ -void xen_build_mfn_list_list(void) -{ - unsigned long pfn; - - /* Pre-initialize p2m_top_mfn to be completely missing */ - if (p2m_top_mfn == NULL) { - p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_mfn_init(p2m_mid_missing_mfn); - - p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_top_mfn_p_init(p2m_top_mfn_p); - - p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_top_mfn_init(p2m_top_mfn); - } else { - /* Reinitialise, mfn's all change after migration */ - p2m_mid_mfn_init(p2m_mid_missing_mfn); - } - - for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - unsigned long **mid; - unsigned long *mid_mfn_p; - - mid = p2m_top[topidx]; - mid_mfn_p = p2m_top_mfn_p[topidx]; - - /* Don't bother allocating any mfn mid levels if - * they're just missing, just update the stored mfn, - * since all could have changed over a migrate. - */ - if (mid == p2m_mid_missing) { - BUG_ON(mididx); - BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); - p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); - pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; - continue; - } - - if (mid_mfn_p == p2m_mid_missing_mfn) { - /* - * XXX boot-time only! We should never find - * missing parts of the mfn tree after - * runtime. extend_brk() will BUG if we call - * it too late. - */ - mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_mfn_init(mid_mfn_p); - - p2m_top_mfn_p[topidx] = mid_mfn_p; - } - - p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); - mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); - } -} - -void xen_setup_mfn_list_list(void) -{ - BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); - - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = - virt_to_mfn(p2m_top_mfn); - HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; -} - -/* Set up p2m_top to point to the domain-builder provided p2m pages */ -void __init xen_build_dynamic_phys_to_machine(void) -{ - unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; - unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); - unsigned long pfn; - - xen_max_p2m_pfn = max_pfn; - - p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m_missing); - - p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(p2m_mid_missing); - - p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_top_init(p2m_top); - - /* - * The domain builder gives us a pre-constructed p2m array in - * mfn_list for all the pages initially given to us, so we just - * need to graft that into our tree structure. - */ - for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - - if (p2m_top[topidx] == p2m_mid_missing) { - unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(mid); - - p2m_top[topidx] = mid; - } - - p2m_top[topidx][mididx] = &mfn_list[pfn]; - } -} - -unsigned long get_phys_to_machine(unsigned long pfn) -{ - unsigned topidx, mididx, idx; - - if (unlikely(pfn >= MAX_P2M_PFN)) - return INVALID_P2M_ENTRY; - - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - return p2m_top[topidx][mididx][idx]; -} -EXPORT_SYMBOL_GPL(get_phys_to_machine); - -static void *alloc_p2m_page(void) -{ - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); -} - -static void free_p2m_page(void *p) -{ - free_page((unsigned long)p); -} - -/* - * Fully allocate the p2m structure for a given pfn. We need to check - * that both the top and mid levels are allocated, and make sure the - * parallel mfn tree is kept in sync. We may race with other cpus, so - * the new pages are installed with cmpxchg; if we lose the race then - * simply free the page we allocated and use the one that's there. - */ -static bool alloc_p2m(unsigned long pfn) -{ - unsigned topidx, mididx; - unsigned long ***top_p, **mid; - unsigned long *top_mfn_p, *mid_mfn; - - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - - top_p = &p2m_top[topidx]; - mid = *top_p; - - if (mid == p2m_mid_missing) { - /* Mid level is missing, allocate a new one */ - mid = alloc_p2m_page(); - if (!mid) - return false; - - p2m_mid_init(mid); - - if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) - free_p2m_page(mid); - } - - top_mfn_p = &p2m_top_mfn[topidx]; - mid_mfn = p2m_top_mfn_p[topidx]; - - BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); - - if (mid_mfn == p2m_mid_missing_mfn) { - /* Separately check the mid mfn level */ - unsigned long missing_mfn; - unsigned long mid_mfn_mfn; - - mid_mfn = alloc_p2m_page(); - if (!mid_mfn) - return false; - - p2m_mid_mfn_init(mid_mfn); - - missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); - mid_mfn_mfn = virt_to_mfn(mid_mfn); - if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) - free_p2m_page(mid_mfn); - else - p2m_top_mfn_p[topidx] = mid_mfn; - } - - if (p2m_top[topidx][mididx] == p2m_missing) { - /* p2m leaf page is missing */ - unsigned long *p2m; - - p2m = alloc_p2m_page(); - if (!p2m) - return false; - - p2m_init(p2m); - - if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) - free_p2m_page(p2m); - else - mid_mfn[mididx] = virt_to_mfn(p2m); - } - - return true; -} - -/* Try to install p2m mapping; fail if intermediate bits missing */ -bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - unsigned topidx, mididx, idx; - - if (unlikely(pfn >= MAX_P2M_PFN)) { - BUG_ON(mfn != INVALID_P2M_ENTRY); - return true; - } - - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - if (p2m_top[topidx][mididx] == p2m_missing) - return mfn == INVALID_P2M_ENTRY; - - p2m_top[topidx][mididx][idx] = mfn; - - return true; -} - -bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return true; - } - - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { - if (!alloc_p2m(pfn)) - return false; - - if (!__set_phys_to_machine(pfn, mfn)) - return false; - } - - return true; -} - unsigned long arbitrary_virt_to_mfn(void *vaddr) { xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); @@ -566,6 +202,7 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) offset = address & ~PAGE_MASK; return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); } +EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); void make_lowmem_page_readonly(void *vaddr) { @@ -780,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (val & _PAGE_PRESENT) { unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; pteval_t flags = val & PTE_FLAGS_MASK; - unsigned long mfn = pfn_to_mfn(pfn); + unsigned long mfn; + if (!xen_feature(XENFEAT_auto_translated_physmap)) + mfn = get_phys_to_machine(pfn); + else + mfn = pfn; /* * If there's no mfn for the pfn, then just create an * empty non-present pte. Unfortunately this loses @@ -791,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (unlikely(mfn == INVALID_P2M_ENTRY)) { mfn = 0; flags = 0; + } else { + /* + * Paramount to do this test _after_ the + * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & + * IDENTITY_FRAME_BIT resolves to true. + */ + mfn &= ~FOREIGN_FRAME_BIT; + if (mfn & IDENTITY_FRAME_BIT) { + mfn &= ~IDENTITY_FRAME_BIT; + flags |= _PAGE_IOMAP; + } } - val = ((pteval_t)mfn << PAGE_SHIFT) | flags; } @@ -896,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); +#ifdef CONFIG_XEN_DEBUG +pte_t xen_make_pte_debug(pteval_t pte) +{ + phys_addr_t addr = (pte & PTE_PFN_MASK); + phys_addr_t other_addr; + bool io_page = false; + pte_t _pte; + + if (pte & _PAGE_IOMAP) + io_page = true; + + _pte = xen_make_pte(pte); + + if (!addr) + return _pte; + + if (io_page && + (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { + other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; + WARN(addr != other_addr, + "0x%lx is using VM_IO, but it is 0x%lx!\n", + (unsigned long)addr, (unsigned long)other_addr); + } else { + pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; + other_addr = (_pte.pte & PTE_PFN_MASK); + WARN((addr == other_addr) && (!io_page) && (!iomap_set), + "0x%lx is missing VM_IO (and wasn't fixed)!\n", + (unsigned long)addr); + } + + return _pte; +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); +#endif + pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); @@ -1350,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm) */ void xen_mm_pin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (!PagePinned(page)) { @@ -1362,7 +1047,7 @@ void xen_mm_pin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } /* @@ -1463,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm) */ void xen_mm_unpin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (PageSavePinned(page)) { @@ -1476,7 +1160,7 @@ void xen_mm_unpin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) @@ -1807,7 +1491,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) * early_ioremap fixmap slot, make sure it is RO. */ if (!is_early_ioremap_ptep(ptep) && - pfn >= e820_table_start && pfn < e820_table_end) + pfn >= pgt_buf_start && pfn < pgt_buf_end) pte = pte_wrprotect(pte); return pte; @@ -2306,6 +1990,9 @@ __init void xen_ident_map_ISA(void) static __init void xen_post_allocator_init(void) { +#ifdef CONFIG_XEN_DEBUG + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); +#endif pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; @@ -2438,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, in_frames[i] = virt_to_mfn(vaddr); MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); - set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); + __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); if (out_frames) out_frames[i] = virt_to_pfn(vaddr); @@ -2717,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); #ifdef CONFIG_XEN_DEBUG_FS +static int p2m_dump_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, p2m_dump_show, NULL); +} + +static const struct file_operations p2m_dump_fops = { + .open = p2m_dump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static struct dentry *d_mmu_debug; static int __init xen_mmu_debugfs(void) @@ -2772,6 +2471,7 @@ static int __init xen_mmu_debugfs(void) debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, &mmu_stats.prot_commit_batched); + debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); return 0; } fs_initcall(xen_mmu_debugfs); |