diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-17 11:10:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-17 11:10:11 -0700 |
commit | fb9fc395174138983a49f2da982ed14caabbe741 (patch) | |
tree | 5d5d3643ee6853a899205613da272cc343fdc1a4 /arch/x86/xen/enlighten.c | |
parent | 0eafaae84e21ac033815cc9f33c3ae889cd7ccfe (diff) | |
parent | ace2e92e193126711cb3a83a3752b2c5b8396950 (diff) |
Merge branch 'xen-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
* 'xen-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen:
xfs: eagerly remove vmap mappings to avoid upsetting Xen
xen: add some debug output for failed multicalls
xen: fix incorrect vcpu_register_vcpu_info hypercall argument
xen: ask the hypervisor how much space it needs reserved
xen: lock pte pages while pinning/unpinning
xen: deal with stale cr3 values when unpinning pagetables
xen: add batch completion callbacks
xen: yield to IPI target if necessary
Clean up duplicate includes in arch/i386/xen/
remove dead code in pgtable_cache_init
paravirt: clean up lazy mode handling
paravirt: refactor struct paravirt_ops into smaller pv_*_ops
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r-- | arch/x86/xen/enlighten.c | 233 |
1 files changed, 142 insertions, 91 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 493a083f688..94c39aaf695 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -25,7 +25,6 @@ #include <linux/mm.h> #include <linux/page-flags.h> #include <linux/highmem.h> -#include <linux/smp.h> #include <xen/interface/xen.h> #include <xen/interface/physdev.h> @@ -52,11 +51,25 @@ EXPORT_SYMBOL_GPL(hypercall_page); -DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); - DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); -DEFINE_PER_CPU(unsigned long, xen_cr3); + +/* + * Note about cr3 (pagetable base) values: + * + * xen_cr3 contains the current logical cr3 value; it contains the + * last set cr3. This may not be the current effective cr3, because + * its update may be being lazily deferred. However, a vcpu looking + * at its own cr3 can use this value knowing that it everything will + * be self-consistent. + * + * xen_current_cr3 contains the actual vcpu cr3; it is set once the + * hypercall to set the vcpu cr3 is complete (so it may be a little + * out of date, but it will never be set early). If one vcpu is + * looking at another vcpu's cr3 value, it should use this variable. + */ +DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ +DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -100,7 +113,7 @@ static void __init xen_vcpu_setup(int cpu) info.mfn = virt_to_mfn(vcpup); info.offset = offset_in_page(vcpup); - printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n", + printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", cpu, vcpup, info.mfn, info.offset); /* Check to see if the hypervisor will put the vcpu_info @@ -124,7 +137,7 @@ static void __init xen_vcpu_setup(int cpu) static void __init xen_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - paravirt_ops.name); + pv_info.name); printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); } @@ -249,29 +262,10 @@ static void xen_halt(void) xen_safe_halt(); } -static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) +static void xen_leave_lazy(void) { - BUG_ON(preemptible()); - - switch (mode) { - case PARAVIRT_LAZY_NONE: - BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); - break; - - case PARAVIRT_LAZY_MMU: - case PARAVIRT_LAZY_CPU: - BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE); - break; - - case PARAVIRT_LAZY_FLUSH: - /* flush if necessary, but don't change state */ - if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE) - xen_mc_flush(); - return; - } - + paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); - x86_write_percpu(xen_lazy_mode, mode); } static unsigned long xen_store_tr(void) @@ -358,7 +352,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) * loaded properly. This will go away as soon as Xen has been * modified to not save/restore %gs for normal hypercalls. */ - if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU) + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) loadsegment(gs, 0); } @@ -632,32 +626,36 @@ static unsigned long xen_read_cr3(void) return x86_read_percpu(xen_cr3); } +static void set_current_cr3(void *v) +{ + x86_write_percpu(xen_current_cr3, (unsigned long)v); +} + static void xen_write_cr3(unsigned long cr3) { + struct mmuext_op *op; + struct multicall_space mcs; + unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); + BUG_ON(preemptible()); - if (cr3 == x86_read_percpu(xen_cr3)) { - /* just a simple tlb flush */ - xen_flush_tlb(); - return; - } + mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ x86_write_percpu(xen_cr3, cr3); + op = mcs.args; + op->cmd = MMUEXT_NEW_BASEPTR; + op->arg1.mfn = mfn; - { - struct mmuext_op *op; - struct multicall_space mcs = xen_mc_entry(sizeof(*op)); - unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); - - op = mcs.args; - op->cmd = MMUEXT_NEW_BASEPTR; - op->arg1.mfn = mfn; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + /* Update xen_update_cr3 once the batch has actually + been submitted. */ + xen_mc_callback(set_current_cr3, (void *)cr3); - xen_mc_issue(PARAVIRT_LAZY_CPU); - } + xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } /* Early in boot, while setting up the initial pagetable, assume @@ -668,6 +666,15 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); } +static void pin_pagetable_pfn(unsigned level, unsigned long pfn) +{ + struct mmuext_op op; + op.cmd = level; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + /* This needs to make sure the new pte page is pinned iff its being attached to a pinned pagetable. */ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) @@ -677,9 +684,10 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) if (PagePinned(virt_to_page(mm->pgd))) { SetPagePinned(page); - if (!PageHighMem(page)) + if (!PageHighMem(page)) { make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); - else + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + } else /* make sure there are no stray mappings of this page */ kmap_flush_unused(); @@ -692,8 +700,10 @@ static void xen_release_pt(u32 pfn) struct page *page = pfn_to_page(pfn); if (PagePinned(page)) { - if (!PageHighMem(page)) + if (!PageHighMem(page)) { + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); + } } } @@ -738,7 +748,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; /* special set_pte for pagetable initialization */ - paravirt_ops.set_pte = xen_set_pte_init; + pv_mmu_ops.set_pte = xen_set_pte_init; init_mm.pgd = base; /* @@ -785,8 +795,8 @@ static __init void xen_pagetable_setup_done(pgd_t *base) { /* This will work as long as patching hasn't happened yet (which it hasn't) */ - paravirt_ops.alloc_pt = xen_alloc_pt; - paravirt_ops.set_pte = xen_set_pte; + pv_mmu_ops.alloc_pt = xen_alloc_pt; + pv_mmu_ops.set_pte = xen_set_pte; if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* @@ -808,15 +818,15 @@ static __init void xen_pagetable_setup_done(pgd_t *base) /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ { - struct mmuext_op op; + unsigned level; + #ifdef CONFIG_X86_PAE - op.cmd = MMUEXT_PIN_L3_TABLE; + level = MMUEXT_PIN_L3_TABLE; #else - op.cmd = MMUEXT_PIN_L3_TABLE; + level = MMUEXT_PIN_L2_TABLE; #endif - op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base))); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); + + pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); } } @@ -833,12 +843,12 @@ void __init xen_setup_vcpu_info_placement(void) if (have_vcpu_info_placement) { printk(KERN_INFO "Xen: using vcpu_info placement\n"); - paravirt_ops.save_fl = xen_save_fl_direct; - paravirt_ops.restore_fl = xen_restore_fl_direct; - paravirt_ops.irq_disable = xen_irq_disable_direct; - paravirt_ops.irq_enable = xen_irq_enable_direct; - paravirt_ops.read_cr2 = xen_read_cr2_direct; - paravirt_ops.iret = xen_iret_direct; + pv_irq_ops.save_fl = xen_save_fl_direct; + pv_irq_ops.restore_fl = xen_restore_fl_direct; + pv_irq_ops.irq_disable = xen_irq_disable_direct; + pv_irq_ops.irq_enable = xen_irq_enable_direct; + pv_mmu_ops.read_cr2 = xen_read_cr2_direct; + pv_cpu_ops.iret = xen_iret_direct; } } @@ -850,8 +860,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, start = end = reloc = NULL; -#define SITE(x) \ - case PARAVIRT_PATCH(x): \ +#define SITE(op, x) \ + case PARAVIRT_PATCH(op.x): \ if (have_vcpu_info_placement) { \ start = (char *)xen_##x##_direct; \ end = xen_##x##_direct_end; \ @@ -860,10 +870,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, goto patch_site switch (type) { - SITE(irq_enable); - SITE(irq_disable); - SITE(save_fl); - SITE(restore_fl); + SITE(pv_irq_ops, irq_enable); + SITE(pv_irq_ops, irq_disable); + SITE(pv_irq_ops, save_fl); + SITE(pv_irq_ops, restore_fl); #undef SITE patch_site: @@ -895,26 +905,32 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } -static const struct paravirt_ops xen_paravirt_ops __initdata = { +static const struct pv_info xen_info __initdata = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, .name = "Xen", - .banner = xen_banner, +}; +static const struct pv_init_ops xen_init_ops __initdata = { .patch = xen_patch, + .banner = xen_banner, .memory_setup = xen_memory_setup, .arch_setup = xen_arch_setup, - .init_IRQ = xen_init_IRQ, .post_allocator_init = xen_mark_init_mm_pinned, +}; +static const struct pv_time_ops xen_time_ops __initdata = { .time_init = xen_time_init, + .set_wallclock = xen_set_wallclock, .get_wallclock = xen_get_wallclock, .get_cpu_khz = xen_cpu_khz, .sched_clock = xen_sched_clock, +}; +static const struct pv_cpu_ops xen_cpu_ops __initdata = { .cpuid = xen_cpuid, .set_debugreg = xen_set_debugreg, @@ -925,22 +941,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .read_cr0 = native_read_cr0, .write_cr0 = native_write_cr0, - .read_cr2 = xen_read_cr2, - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, - .write_cr3 = xen_write_cr3, - .read_cr4 = native_read_cr4, .read_cr4_safe = native_read_cr4_safe, .write_cr4 = xen_write_cr4, - .save_fl = xen_save_fl, - .restore_fl = xen_restore_fl, - .irq_disable = xen_irq_disable, - .irq_enable = xen_irq_enable, - .safe_halt = xen_safe_halt, - .halt = xen_halt, .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, @@ -969,6 +973,23 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .set_iopl_mask = xen_set_iopl_mask, .io_delay = xen_io_delay, + .lazy_mode = { + .enter = paravirt_enter_lazy_cpu, + .leave = xen_leave_lazy, + }, +}; + +static const struct pv_irq_ops xen_irq_ops __initdata = { + .init_IRQ = xen_init_IRQ, + .save_fl = xen_save_fl, + .restore_fl = xen_restore_fl, + .irq_disable = xen_irq_disable, + .irq_enable = xen_irq_enable, + .safe_halt = xen_safe_halt, + .halt = xen_halt, +}; + +static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC .apic_write = xen_apic_write, .apic_write_atomic = xen_apic_write, @@ -977,6 +998,17 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, #endif +}; + +static const struct pv_mmu_ops xen_mmu_ops __initdata = { + .pagetable_setup_start = xen_pagetable_setup_start, + .pagetable_setup_done = xen_pagetable_setup_done, + + .read_cr2 = xen_read_cr2, + .write_cr2 = xen_write_cr2, + + .read_cr3 = xen_read_cr3, + .write_cr3 = xen_write_cr3, .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, @@ -986,9 +1018,6 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, - .pagetable_setup_start = xen_pagetable_setup_start, - .pagetable_setup_done = xen_pagetable_setup_done, - .alloc_pt = xen_alloc_pt_init, .release_pt = xen_release_pt, .alloc_pd = paravirt_nop, @@ -1024,7 +1053,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .dup_mmap = xen_dup_mmap, .exit_mmap = xen_exit_mmap, - .set_lazy_mode = xen_set_lazy_mode, + .lazy_mode = { + .enter = paravirt_enter_lazy_mmu, + .leave = xen_leave_lazy, + }, }; #ifdef CONFIG_SMP @@ -1080,6 +1112,17 @@ static const struct machine_ops __initdata xen_machine_ops = { }; +static void __init xen_reserve_top(void) +{ + unsigned long top = HYPERVISOR_VIRT_START; + struct xen_platform_parameters pp; + + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) + top = pp.virt_start; + + reserve_top_address(-top + 2 * PAGE_SIZE); +} + /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -1091,7 +1134,14 @@ asmlinkage void __init xen_start_kernel(void) BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0); /* Install Xen paravirt ops */ - paravirt_ops = xen_paravirt_ops; + pv_info = xen_info; + pv_init_ops = xen_init_ops; + pv_time_ops = xen_time_ops; + pv_cpu_ops = xen_cpu_ops; + pv_irq_ops = xen_irq_ops; + pv_apic_ops = xen_apic_ops; + pv_mmu_ops = xen_mmu_ops; + machine_ops = xen_machine_ops; #ifdef CONFIG_SMP @@ -1113,6 +1163,7 @@ asmlinkage void __init xen_start_kernel(void) /* keep using Xen gdt for now; no urgent need to change it */ x86_write_percpu(xen_cr3, __pa(pgd)); + x86_write_percpu(xen_current_cr3, __pa(pgd)); #ifdef CONFIG_SMP /* Don't do the full vcpu_info placement stuff until we have a @@ -1124,12 +1175,12 @@ asmlinkage void __init xen_start_kernel(void) xen_setup_vcpu_info_placement(); #endif - paravirt_ops.kernel_rpl = 1; + pv_info.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) - paravirt_ops.kernel_rpl = 0; + pv_info.kernel_rpl = 0; /* set the limit of our address space */ - reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE); + xen_reserve_top(); /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); |