summaryrefslogtreecommitdiffstats
path: root/arch/i386/xen
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-10-11 11:16:51 +0200
committerThomas Gleixner <tglx@linutronix.de>2007-10-11 11:16:51 +0200
commit9702785a747aa27baf46ff504beab6528f21f2dd (patch)
treeab69d6f802f5b680c33999dc089e44982c74595d /arch/i386/xen
parent334e621a01f86d5bc25e4f742e1eaae6e2d2a97a (diff)
i386: move xen
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/i386/xen')
-rw-r--r--arch/i386/xen/Kconfig11
-rw-r--r--arch/i386/xen/Makefile4
-rw-r--r--arch/i386/xen/enlighten.c1146
-rw-r--r--arch/i386/xen/events.c591
-rw-r--r--arch/i386/xen/features.c29
-rw-r--r--arch/i386/xen/manage.c143
-rw-r--r--arch/i386/xen/mmu.c567
-rw-r--r--arch/i386/xen/mmu.h60
-rw-r--r--arch/i386/xen/multicalls.c90
-rw-r--r--arch/i386/xen/multicalls.h45
-rw-r--r--arch/i386/xen/setup.c111
-rw-r--r--arch/i386/xen/smp.c404
-rw-r--r--arch/i386/xen/time.c593
-rw-r--r--arch/i386/xen/vdso.h4
-rw-r--r--arch/i386/xen/xen-asm.S291
-rw-r--r--arch/i386/xen/xen-head.S38
-rw-r--r--arch/i386/xen/xen-ops.h71
17 files changed, 0 insertions, 4198 deletions
diff --git a/arch/i386/xen/Kconfig b/arch/i386/xen/Kconfig
deleted file mode 100644
index 9df99e1885a..00000000000
--- a/arch/i386/xen/Kconfig
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# This Kconfig describes xen options
-#
-
-config XEN
- bool "Enable support for Xen hypervisor"
- depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES
- help
- This is the Linux Xen port. Enabling this will allow the
- kernel to boot in a paravirtualized environment under the
- Xen hypervisor.
diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile
deleted file mode 100644
index 343df246bd3..00000000000
--- a/arch/i386/xen/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \
- events.o time.o manage.o xen-asm.o
-
-obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c
deleted file mode 100644
index f01bfcd4bde..00000000000
--- a/arch/i386/xen/enlighten.c
+++ /dev/null
@@ -1,1146 +0,0 @@
-/*
- * Core of Xen paravirt_ops implementation.
- *
- * This file contains the xen_paravirt_ops structure itself, and the
- * implementations for:
- * - privileged instructions
- * - interrupt flags
- * - segment operations
- * - booting and setup
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/preempt.h>
-#include <linux/hardirq.h>
-#include <linux/percpu.h>
-#include <linux/delay.h>
-#include <linux/start_kernel.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/page-flags.h>
-#include <linux/highmem.h>
-#include <linux/smp.h>
-
-#include <xen/interface/xen.h>
-#include <xen/interface/physdev.h>
-#include <xen/interface/vcpu.h>
-#include <xen/interface/sched.h>
-#include <xen/features.h>
-#include <xen/page.h>
-
-#include <asm/paravirt.h>
-#include <asm/page.h>
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-#include <asm/fixmap.h>
-#include <asm/processor.h>
-#include <asm/setup.h>
-#include <asm/desc.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/reboot.h>
-
-#include "xen-ops.h"
-#include "mmu.h"
-#include "multicalls.h"
-
-EXPORT_SYMBOL_GPL(hypercall_page);
-
-DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
-
-DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
-DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
-DEFINE_PER_CPU(unsigned long, xen_cr3);
-
-struct start_info *xen_start_info;
-EXPORT_SYMBOL_GPL(xen_start_info);
-
-static /* __initdata */ struct shared_info dummy_shared_info;
-
-/*
- * Point at some empty memory to start with. We map the real shared_info
- * page as soon as fixmap is up and running.
- */
-struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
-
-/*
- * Flag to determine whether vcpu info placement is available on all
- * VCPUs. We assume it is to start with, and then set it to zero on
- * the first failure. This is because it can succeed on some VCPUs
- * and not others, since it can involve hypervisor memory allocation,
- * or because the guest failed to guarantee all the appropriate
- * constraints on all VCPUs (ie buffer can't cross a page boundary).
- *
- * Note that any particular CPU may be using a placed vcpu structure,
- * but we can only optimise if the all are.
- *
- * 0: not available, 1: available
- */
-static int have_vcpu_info_placement = 1;
-
-static void __init xen_vcpu_setup(int cpu)
-{
- struct vcpu_register_vcpu_info info;
- int err;
- struct vcpu_info *vcpup;
-
- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
-
- if (!have_vcpu_info_placement)
- return; /* already tested, not available */
-
- vcpup = &per_cpu(xen_vcpu_info, cpu);
-
- info.mfn = virt_to_mfn(vcpup);
- info.offset = offset_in_page(vcpup);
-
- printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n",
- cpu, vcpup, info.mfn, info.offset);
-
- /* Check to see if the hypervisor will put the vcpu_info
- structure where we want it, which allows direct access via
- a percpu-variable. */
- err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
-
- if (err) {
- printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
- have_vcpu_info_placement = 0;
- } else {
- /* This cpu is using the registered vcpu info, even if
- later ones fail to. */
- per_cpu(xen_vcpu, cpu) = vcpup;
-
- printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
- cpu, vcpup);
- }
-}
-
-static void __init xen_banner(void)
-{
- printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
- paravirt_ops.name);
- printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
-}
-
-static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- unsigned maskedx = ~0;
-
- /*
- * Mask out inconvenient features, to try and disable as many
- * unsupported kernel subsystems as possible.
- */
- if (*eax == 1)
- maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
- (1 << X86_FEATURE_ACPI) | /* disable ACPI */
- (1 << X86_FEATURE_ACC)); /* thermal monitoring */
-
- asm(XEN_EMULATE_PREFIX "cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
- *edx &= maskedx;
-}
-
-static void xen_set_debugreg(int reg, unsigned long val)
-{
- HYPERVISOR_set_debugreg(reg, val);
-}
-
-static unsigned long xen_get_debugreg(int reg)
-{
- return HYPERVISOR_get_debugreg(reg);
-}
-
-static unsigned long xen_save_fl(void)
-{
- struct vcpu_info *vcpu;
- unsigned long flags;
-
- vcpu = x86_read_percpu(xen_vcpu);
-
- /* flag has opposite sense of mask */
- flags = !vcpu->evtchn_upcall_mask;
-
- /* convert to IF type flag
- -0 -> 0x00000000
- -1 -> 0xffffffff
- */
- return (-flags) & X86_EFLAGS_IF;
-}
-
-static void xen_restore_fl(unsigned long flags)
-{
- struct vcpu_info *vcpu;
-
- /* convert from IF type flag */
- flags = !(flags & X86_EFLAGS_IF);
-
- /* There's a one instruction preempt window here. We need to
- make sure we're don't switch CPUs between getting the vcpu
- pointer and updating the mask. */
- preempt_disable();
- vcpu = x86_read_percpu(xen_vcpu);
- vcpu->evtchn_upcall_mask = flags;
- preempt_enable_no_resched();
-
- /* Doesn't matter if we get preempted here, because any
- pending event will get dealt with anyway. */
-
- if (flags == 0) {
- preempt_check_resched();
- barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- force_evtchn_callback();
- }
-}
-
-static void xen_irq_disable(void)
-{
- /* There's a one instruction preempt window here. We need to
- make sure we're don't switch CPUs between getting the vcpu
- pointer and updating the mask. */
- preempt_disable();
- x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
- preempt_enable_no_resched();
-}
-
-static void xen_irq_enable(void)
-{
- struct vcpu_info *vcpu;
-
- /* There's a one instruction preempt window here. We need to
- make sure we're don't switch CPUs between getting the vcpu
- pointer and updating the mask. */
- preempt_disable();
- vcpu = x86_read_percpu(xen_vcpu);
- vcpu->evtchn_upcall_mask = 0;
- preempt_enable_no_resched();
-
- /* Doesn't matter if we get preempted here, because any
- pending event will get dealt with anyway. */
-
- barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- force_evtchn_callback();
-}
-
-static void xen_safe_halt(void)
-{
- /* Blocking includes an implicit local_irq_enable(). */
- if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0)
- BUG();
-}
-
-static void xen_halt(void)
-{
- if (irqs_disabled())
- HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
- else
- xen_safe_halt();
-}
-
-static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
-{
- BUG_ON(preemptible());
-
- switch (mode) {
- case PARAVIRT_LAZY_NONE:
- BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
- break;
-
- case PARAVIRT_LAZY_MMU:
- case PARAVIRT_LAZY_CPU:
- BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE);
- break;
-
- case PARAVIRT_LAZY_FLUSH:
- /* flush if necessary, but don't change state */
- if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE)
- xen_mc_flush();
- return;
- }
-
- xen_mc_flush();
- x86_write_percpu(xen_lazy_mode, mode);
-}
-
-static unsigned long xen_store_tr(void)
-{
- return 0;
-}
-
-static void xen_set_ldt(const void *addr, unsigned entries)
-{
- unsigned long linear_addr = (unsigned long)addr;
- struct mmuext_op *op;
- struct multicall_space mcs = xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = MMUEXT_SET_LDT;
- if (linear_addr) {
- /* ldt my be vmalloced, use arbitrary_virt_to_machine */
- xmaddr_t maddr;
- maddr = arbitrary_virt_to_machine((unsigned long)addr);
- linear_addr = (unsigned long)maddr.maddr;
- }
- op->arg1.linear_addr = linear_addr;
- op->arg2.nr_ents = entries;
-
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
-static void xen_load_gdt(const struct Xgt_desc_struct *dtr)
-{
- unsigned long *frames;
- unsigned long va = dtr->address;
- unsigned int size = dtr->size + 1;
- unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
- int f;
- struct multicall_space mcs;
-
- /* A GDT can be up to 64k in size, which corresponds to 8192
- 8-byte entries, or 16 4k pages.. */
-
- BUG_ON(size > 65536);
- BUG_ON(va & ~PAGE_MASK);
-
- mcs = xen_mc_entry(sizeof(*frames) * pages);
- frames = mcs.args;
-
- for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
- frames[f] = virt_to_mfn(va);
- make_lowmem_page_readonly((void *)va);
- }
-
- MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
-static void load_TLS_descriptor(struct thread_struct *t,
- unsigned int cpu, unsigned int i)
-{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
- struct multicall_space mc = __xen_mc_entry(0);
-
- MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
-}
-
-static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- xen_mc_batch();
-
- load_TLS_descriptor(t, cpu, 0);
- load_TLS_descriptor(t, cpu, 1);
- load_TLS_descriptor(t, cpu, 2);
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-
- /*
- * XXX sleazy hack: If we're being called in a lazy-cpu zone,
- * it means we're in a context switch, and %gs has just been
- * saved. This means we can zero it out to prevent faults on
- * exit from the hypervisor if the next process has no %gs.
- * Either way, it has been saved, and the new value will get
- * loaded properly. This will go away as soon as Xen has been
- * modified to not save/restore %gs for normal hypercalls.
- */
- if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU)
- loadsegment(gs, 0);
-}
-
-static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
- u32 low, u32 high)
-{
- unsigned long lp = (unsigned long)&dt[entrynum];
- xmaddr_t mach_lp = virt_to_machine(lp);
- u64 entry = (u64)high << 32 | low;
-
- preempt_disable();
-
- xen_mc_flush();
- if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
- BUG();
-
- preempt_enable();
-}
-
-static int cvt_gate_to_trap(int vector, u32 low, u32 high,
- struct trap_info *info)
-{
- u8 type, dpl;
-
- type = (high >> 8) & 0x1f;
- dpl = (high >> 13) & 3;
-
- if (type != 0xf && type != 0xe)
- return 0;
-
- info->vector = vector;
- info->address = (high & 0xffff0000) | (low & 0x0000ffff);
- info->cs = low >> 16;
- info->flags = dpl;
- /* interrupt gates clear IF */
- if (type == 0xe)
- info->flags |= 4;
-
- return 1;
-}
-
-/* Locations of each CPU's IDT */
-static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc);
-
-/* Set an IDT entry. If the entry is part of the current IDT, then
- also update Xen. */
-static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
- u32 low, u32 high)
-{
- unsigned long p = (unsigned long)&dt[entrynum];
- unsigned long start, end;
-
- preempt_disable();
-
- start = __get_cpu_var(idt_desc).address;
- end = start + __get_cpu_var(idt_desc).size + 1;
-
- xen_mc_flush();
-
- write_dt_entry(dt, entrynum, low, high);
-
- if (p >= start && (p + 8) <= end) {
- struct trap_info info[2];
-
- info[1].address = 0;
-
- if (cvt_gate_to_trap(entrynum, low, high, &info[0]))
- if (HYPERVISOR_set_trap_table(info))
- BUG();
- }
-
- preempt_enable();
-}
-
-static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
- struct trap_info *traps)
-{
- unsigned in, out, count;
-
- count = (desc->size+1) / 8;
- BUG_ON(count > 256);
-
- for (in = out = 0; in < count; in++) {
- const u32 *entry = (u32 *)(desc->address + in * 8);
-
- if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out]))
- out++;
- }
- traps[out].address = 0;
-}
-
-void xen_copy_trap_info(struct trap_info *traps)
-{
- const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc);
-
- xen_convert_trap_info(desc, traps);
-}
-
-/* Load a new IDT into Xen. In principle this can be per-CPU, so we
- hold a spinlock to protect the static traps[] array (static because
- it avoids allocation, and saves stack space). */
-static void xen_load_idt(const struct Xgt_desc_struct *desc)
-{
- static DEFINE_SPINLOCK(lock);
- static struct trap_info traps[257];
-
- spin_lock(&lock);
-
- __get_cpu_var(idt_desc) = *desc;
-
- xen_convert_trap_info(desc, traps);
-
- xen_mc_flush();
- if (HYPERVISOR_set_trap_table(traps))
- BUG();
-
- spin_unlock(&lock);
-}
-
-/* Write a GDT descriptor entry. Ignore LDT descriptors, since
- they're handled differently. */
-static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
- u32 low, u32 high)
-{
- preempt_disable();
-
- switch ((high >> 8) & 0xff) {
- case DESCTYPE_LDT:
- case DESCTYPE_TSS:
- /* ignore */
- break;
-
- default: {
- xmaddr_t maddr = virt_to_machine(&dt[entry]);
- u64 desc = (u64)high << 32 | low;
-
- xen_mc_flush();
- if (HYPERVISOR_update_descriptor(maddr.maddr, desc))
- BUG();
- }
-
- }
-
- preempt_enable();
-}
-
-static void xen_load_esp0(struct tss_struct *tss,
- struct thread_struct *thread)
-{
- struct multicall_space mcs = xen_mc_entry(0);
- MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-}
-
-static void xen_set_iopl_mask(unsigned mask)
-{
- struct physdev_set_iopl set_iopl;
-
- /* Force the change at ring 0. */
- set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
- HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-}
-
-static void xen_io_delay(void)
-{
-}
-
-#ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long xen_apic_read(unsigned long reg)
-{
- return 0;
-}
-
-static void xen_apic_write(unsigned long reg, unsigned long val)
-{
- /* Warn to see if there's any stray references */
- WARN_ON(1);
-}
-#endif
-
-static void xen_flush_tlb(void)
-{
- struct mmuext_op *op;
- struct multicall_space mcs = xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
-
-static void xen_flush_tlb_single(unsigned long addr)
-{
- struct mmuext_op *op;
- struct multicall_space mcs = xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = MMUEXT_INVLPG_LOCAL;
- op->arg1.linear_addr = addr & PAGE_MASK;
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
-
-static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
- unsigned long va)
-{
- struct {
- struct mmuext_op op;
- cpumask_t mask;
- } *args;
- cpumask_t cpumask = *cpus;
- struct multicall_space mcs;
-
- /*
- * A couple of (to be removed) sanity checks:
- *
- * - current CPU must not be in mask
- * - mask must exist :)
- */
- BUG_ON(cpus_empty(cpumask));
- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
- BUG_ON(!mm);
-
- /* If a CPU which we ran on has gone down, OK. */
- cpus_and(cpumask, cpumask, cpu_online_map);
- if (cpus_empty(cpumask))
- return;
-
- mcs = xen_mc_entry(sizeof(*args));
- args = mcs.args;
- args->mask = cpumask;
- args->op.arg2.vcpumask = &args->mask;
-
- if (va == TLB_FLUSH_ALL) {
- args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
- } else {
- args->op.cmd = MMUEXT_INVLPG_MULTI;
- args->op.arg1.linear_addr = va;
- }
-
- MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
-
-static void xen_write_cr2(unsigned long cr2)
-{
- x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
-}
-
-static unsigned long xen_read_cr2(void)
-{
- return x86_read_percpu(xen_vcpu)->arch.cr2;
-}
-
-static unsigned long xen_read_cr2_direct(void)
-{
- return x86_read_percpu(xen_vcpu_info.arch.cr2);
-}
-
-static void xen_write_cr4(unsigned long cr4)
-{
- /* Just ignore cr4 changes; Xen doesn't allow us to do
- anything anyway. */
-}
-
-static unsigned long xen_read_cr3(void)
-{
- return x86_read_percpu(xen_cr3);
-}
-
-static void xen_write_cr3(unsigned long cr3)
-{
- BUG_ON(preemptible());
-
- if (cr3 == x86_read_percpu(xen_cr3)) {
- /* just a simple tlb flush */
- xen_flush_tlb();
- return;
- }
-
- x86_write_percpu(xen_cr3, cr3);
-
-
- {
- struct mmuext_op *op;
- struct multicall_space mcs = xen_mc_entry(sizeof(*op));
- unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
-
- op = mcs.args;
- op->cmd = MMUEXT_NEW_BASEPTR;
- op->arg1.mfn = mfn;
-
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
- }
-}
-
-/* Early in boot, while setting up the initial pagetable, assume
- everything is pinned. */
-static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
-{
- BUG_ON(mem_map); /* should only be used early */
- make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
-}
-
-/* This needs to make sure the new pte page is pinned iff its being
- attached to a pinned pagetable. */
-static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
-{
- struct page *page = pfn_to_page(pfn);
-
- if (PagePinned(virt_to_page(mm->pgd))) {
- SetPagePinned(page);
-
- if (!PageHighMem(page))
- make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
- else
- /* make sure there are no stray mappings of
- this page */
- kmap_flush_unused();
- }
-}
-
-/* This should never happen until we're OK to use struct page */
-static void xen_release_pt(u32 pfn)
-{
- struct page *page = pfn_to_page(pfn);
-
- if (PagePinned(page)) {
- if (!PageHighMem(page))
- make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
- }
-}
-
-#ifdef CONFIG_HIGHPTE
-static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
-{
- pgprot_t prot = PAGE_KERNEL;
-
- if (PagePinned(page))
- prot = PAGE_KERNEL_RO;
-
- if (0 && PageHighMem(page))
- printk("mapping highpte %lx type %d prot %s\n",
- page_to_pfn(page), type,
- (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
-
- return kmap_atomic_prot(page, type, prot);
-}
-#endif
-
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
-{
- /* If there's an existing pte, then don't allow _PAGE_RW to be set */
- if (pte_val_ma(*ptep) & _PAGE_PRESENT)
- pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
- pte_val_ma(pte));
-
- return pte;
-}
-
-/* Init-time set_pte while constructing initial pagetables, which
- doesn't allow RO pagetable pages to be remapped RW */
-static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
-{
- pte = mask_rw_pte(ptep, pte);
-
- xen_set_pte(ptep, pte);
-}
-
-static __init void xen_pagetable_setup_start(pgd_t *base)
-{
- pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
-
- /* special set_pte for pagetable initialization */
- paravirt_ops.set_pte = xen_set_pte_init;
-
- init_mm.pgd = base;
- /*
- * copy top-level of Xen-supplied pagetable into place. For
- * !PAE we can use this as-is, but for PAE it is a stand-in
- * while we copy the pmd pages.
- */
- memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
-
- if (PTRS_PER_PMD > 1) {
- int i;
- /*
- * For PAE, need to allocate new pmds, rather than
- * share Xen's, since Xen doesn't like pmd's being
- * shared between address spaces.
- */
- for (i = 0; i < PTRS_PER_PGD; i++) {
- if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
- pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-
- memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
- PAGE_SIZE);
-
- make_lowmem_page_readonly(pmd);
-
- set_pgd(&base[i], __pgd(1 + __pa(pmd)));
- } else
- pgd_clear(&base[i]);
- }
- }
-
- /* make sure zero_page is mapped RO so we can use it in pagetables */
- make_lowmem_page_readonly(empty_zero_page);
- make_lowmem_page_readonly(base);
- /*
- * Switch to new pagetable. This is done before
- * pagetable_init has done anything so that the new pages
- * added to the table can be prepared properly for Xen.
- */
- xen_write_cr3(__pa(base));
-}
-
-static __init void xen_pagetable_setup_done(pgd_t *base)
-{
- /* This will work as long as patching hasn't happened yet
- (which it hasn't) */
- paravirt_ops.alloc_pt = xen_alloc_pt;
- paravirt_ops.set_pte = xen_set_pte;
-
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- /*
- * Create a mapping for the shared info page.
- * Should be set_fixmap(), but shared_info is a machine
- * address with no corresponding pseudo-phys address.
- */
- set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
- PFN_DOWN(xen_start_info->shared_info),
- PAGE_KERNEL);
-
- HYPERVISOR_shared_info =
- (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
-
- } else
- HYPERVISOR_shared_info =
- (struct shared_info *)__va(xen_start_info->shared_info);
-
- /* Actually pin the pagetable down, but we can't set PG_pinned
- yet because the page structures don't exist yet. */
- {
- struct mmuext_op op;
-#ifdef CONFIG_X86_PAE
- op.cmd = MMUEXT_PIN_L3_TABLE;
-#else
- op.cmd = MMUEXT_PIN_L3_TABLE;
-#endif
- op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base)));
- if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
- BUG();
- }
-}
-
-/* This is called once we have the cpu_possible_map */
-void __init xen_setup_vcpu_info_placement(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- xen_vcpu_setup(cpu);
-
- /* xen_vcpu_setup managed to place the vcpu_info within the
- percpu area for all cpus, so make use of it */
- if (have_vcpu_info_placement) {
- printk(KERN_INFO "Xen: using vcpu_info placement\n");
-
- paravirt_ops.save_fl = xen_save_fl_direct;
- paravirt_ops.restore_fl = xen_restore_fl_direct;
- paravirt_ops.irq_disable = xen_irq_disable_direct;
- paravirt_ops.irq_enable = xen_irq_enable_direct;
- paravirt_ops.read_cr2 = xen_read_cr2_direct;
- paravirt_ops.iret = xen_iret_direct;
- }
-}
-
-static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
- unsigned long addr, unsigned len)
-{
- char *start, *end, *reloc;
- unsigned ret;
-
- start = end = reloc = NULL;
-
-#define SITE(x) \
- case PARAVIRT_PATCH(x): \
- if (have_vcpu_info_placement) { \
- start = (char *)xen_##x##_direct; \
- end = xen_##x##_direct_end; \
- reloc = xen_##x##_direct_reloc; \
- } \
- goto patch_site
-
- switch (type) {
- SITE(irq_enable);
- SITE(irq_disable);
- SITE(save_fl);
- SITE(restore_fl);
-#undef SITE
-
- patch_site:
- if (start == NULL || (end-start) > len)
- goto default_patch;
-
- ret = paravirt_patch_insns(insnbuf, len, start, end);
-
- /* Note: because reloc is assigned from something that
- appears to be an array, gcc assumes it's non-null,
- but doesn't know its relationship with start and
- end. */
- if (reloc > start && reloc < end) {
- int reloc_off = reloc - start;
- long *relocp = (long *)(insnbuf + reloc_off);
- long delta = start - (char *)addr;
-
- *relocp += delta;
- }
- break;
-
- default_patch:
- default:
- ret = paravirt_patch_default(type, clobbers, insnbuf,
- addr, len);
- break;
- }
-
- return ret;
-}
-
-static const struct paravirt_ops xen_paravirt_ops __initdata = {
- .paravirt_enabled = 1,
- .shared_kernel_pmd = 0,
-
- .name = "Xen",
- .banner = xen_banner,
-
- .patch = xen_patch,
-
- .memory_setup = xen_memory_setup,
- .arch_setup = xen_arch_setup,
- .init_IRQ = xen_init_IRQ,
- .post_allocator_init = xen_mark_init_mm_pinned,
-
- .time_init = xen_time_init,
- .set_wallclock = xen_set_wallclock,
- .get_wallclock = xen_get_wallclock,
- .get_cpu_khz = xen_cpu_khz,
- .sched_clock = xen_sched_clock,
-
- .cpuid = xen_cpuid,
-
- .set_debugreg = xen_set_debugreg,
- .get_debugreg = xen_get_debugreg,
-
- .clts = native_clts,
-
- .read_cr0 = native_read_cr0,
- .write_cr0 = native_write_cr0,
-
- .read_cr2 = xen_read_cr2,
- .write_cr2 = xen_write_cr2,
-
- .read_cr3 = xen_read_cr3,
- .write_cr3 = xen_write_cr3,
-
- .read_cr4 = native_read_cr4,
- .read_cr4_safe = native_read_cr4_safe,
- .write_cr4 = xen_write_cr4,
-
- .save_fl = xen_save_fl,
- .restore_fl = xen_restore_fl,
- .irq_disable = xen_irq_disable,
- .irq_enable = xen_irq_enable,
- .safe_halt = xen_safe_halt,
- .halt = xen_halt,
- .wbinvd = native_wbinvd,
-
- .read_msr = native_read_msr_safe,
- .write_msr = native_write_msr_safe,
- .read_tsc = native_read_tsc,
- .read_pmc = native_read_pmc,
-
- .iret = (void *)&hypercall_page[__HYPERVISOR_iret],
- .irq_enable_sysexit = NULL, /* never called */
-
- .load_tr_desc = paravirt_nop,
- .set_ldt = xen_set_ldt,
- .load_gdt = xen_load_gdt,
- .load_idt = xen_load_idt,
- .load_tls = xen_load_tls,
-
- .store_gdt = native_store_gdt,
- .store_idt = native_store_idt,
- .store_tr = xen_store_tr,
-
- .write_ldt_entry = xen_write_ldt_entry,
- .write_gdt_entry = xen_write_gdt_entry,
- .write_idt_entry = xen_write_idt_entry,
- .load_esp0 = xen_load_esp0,
-
- .set_iopl_mask = xen_set_iopl_mask,
- .io_delay = xen_io_delay,
-
-#ifdef CONFIG_X86_LOCAL_APIC
- .apic_write = xen_apic_write,
- .apic_write_atomic = xen_apic_write,
- .apic_read = xen_apic_read,
- .setup_boot_clock = paravirt_nop,
- .setup_secondary_clock = paravirt_nop,
- .startup_ipi_hook = paravirt_nop,
-#endif
-
- .flush_tlb_user = xen_flush_tlb,
- .flush_tlb_kernel = xen_flush_tlb,
- .flush_tlb_single = xen_flush_tlb_single,
- .flush_tlb_others = xen_flush_tlb_others,
-
- .pte_update = paravirt_nop,
- .pte_update_defer = paravirt_nop,
-
- .pagetable_setup_start = xen_pagetable_setup_start,
- .pagetable_setup_done = xen_pagetable_setup_done,
-
- .alloc_pt = xen_alloc_pt_init,
- .release_pt = xen_release_pt,
- .alloc_pd = paravirt_nop,
- .alloc_pd_clone = paravirt_nop,
- .release_pd = paravirt_nop,
-
-#ifdef CONFIG_HIGHPTE
- .kmap_atomic_pte = xen_kmap_atomic_pte,
-#endif
-
- .set_pte = NULL, /* see xen_pagetable_setup_* */
- .set_pte_at = xen_set_pte_at,
- .set_pmd = xen_set_pmd,
-
- .pte_val = xen_pte_val,
- .pgd_val = xen_pgd_val,
-
- .make_pte = xen_make_pte,
- .make_pgd = xen_make_pgd,
-
-#ifdef CONFIG_X86_PAE
- .set_pte_atomic = xen_set_pte_atomic,
- .set_pte_present = xen_set_pte_at,
- .set_pud = xen_set_pud,
- .pte_clear = xen_pte_clear,
- .pmd_clear = xen_pmd_clear,
-
- .make_pmd = xen_make_pmd,
- .pmd_val = xen_pmd_val,
-#endif /* PAE */
-
- .activate_mm = xen_activate_mm,
- .dup_mmap = xen_dup_mmap,
- .exit_mmap = xen_exit_mmap,
-
- .set_lazy_mode = xen_set_lazy_mode,
-};
-
-#ifdef CONFIG_SMP
-static const struct smp_ops xen_smp_ops __initdata = {
- .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
- .smp_prepare_cpus = xen_smp_prepare_cpus,
- .cpu_up = xen_cpu_up,
- .smp_cpus_done = xen_smp_cpus_done,
-
- .smp_send_stop = xen_smp_send_stop,
- .smp_send_reschedule = xen_smp_send_reschedule,
- .smp_call_function_mask = xen_smp_call_function_mask,
-};
-#endif /* CONFIG_SMP */
-
-static void xen_reboot(int reason)
-{
-#ifdef CONFIG_SMP
- smp_send_stop();
-#endif
-
- if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason))
- BUG();
-}
-
-static void xen_restart(char *msg)
-{
- xen_reboot(SHUTDOWN_reboot);
-}
-
-static void xen_emergency_restart(void)
-{
- xen_reboot(SHUTDOWN_reboot);
-}
-
-static void xen_machine_halt(void)
-{
- xen_reboot(SHUTDOWN_poweroff);
-}
-
-static void xen_crash_shutdown(struct pt_regs *regs)
-{
- xen_reboot(SHUTDOWN_crash);
-}
-
-static const struct machine_ops __initdata xen_machine_ops = {
- .restart = xen_restart,
- .halt = xen_machine_halt,
- .power_off = xen_machine_halt,
- .shutdown = xen_machine_halt,
- .crash_shutdown = xen_crash_shutdown,
- .emergency_restart = xen_emergency_restart,
-};
-
-
-/* First C function to be called on Xen boot */
-asmlinkage void __init xen_start_kernel(void)
-{
- pgd_t *pgd;
-
- if (!xen_start_info)
- return;
-
- BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
-
- /* Install Xen paravirt ops */
- paravirt_ops = xen_paravirt_ops;
- machine_ops = xen_machine_ops;
-
-#ifdef CONFIG_SMP
- smp_ops = xen_smp_ops;
-#endif
-
- xen_setup_features();
-
- /* Get mfn list */
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
-
- pgd = (pgd_t *)xen_start_info->pt_base;
-
- init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
-
- init_mm.pgd = pgd; /* use the Xen pagetables to start */
-
- /* keep using Xen gdt for now; no urgent need to change it */
-
- x86_write_percpu(xen_cr3, __pa(pgd));
-
-#ifdef CONFIG_SMP
- /* Don't do the full vcpu_info placement stuff until we have a
- possible map. */
- per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-#else
- /* May as well do it now, since there's no good time to call
- it later on UP. */
- xen_setup_vcpu_info_placement();
-#endif
-
- paravirt_ops.kernel_rpl = 1;
- if (xen_feature(XENFEAT_supervisor_mode_kernel))
- paravirt_ops.kernel_rpl = 0;
-
- /* set the limit of our address space */
- reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE);
-
- /* set up basic CPUID stuff */
- cpu_detect(&new_cpu_data);
- new_cpu_data.hard_math = 1;
- new_cpu_data.x86_capability[0] = cpuid_edx(1);
-
- /* Poke various useful things into boot_params */
- LOADER_TYPE = (9 << 4) | 0;
- INITRD_START = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0;
- INITRD_SIZE = xen_start_info->mod_len;
-
- /* Start the world */
- start_kernel();
-}
diff --git a/arch/i386/xen/events.c b/arch/i386/xen/events.c
deleted file mode 100644
index da1b173547a..00000000000
--- a/arch/i386/xen/events.c
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * Xen event channels
- *
- * Xen models interrupts with abstract event channels. Because each
- * domain gets 1024 event channels, but NR_IRQ is not that large, we
- * must dynamically map irqs<->event channels. The event channels
- * interface with the rest of the kernel by defining a xen interrupt
- * chip. When an event is recieved, it is mapped to an irq and sent
- * through the normal interrupt processing path.
- *
- * There are four kinds of events which can be mapped to an event
- * channel:
- *
- * 1. Inter-domain notifications. This includes all the virtual
- * device events, since they're driven by front-ends in another domain
- * (typically dom0).
- * 2. VIRQs, typically used for timers. These are per-cpu events.
- * 3. IPIs.
- * 4. Hardware interrupts. Not supported at present.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-
-#include <linux/linkage.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <asm/ptrace.h>
-#include <asm/irq.h>
-#include <asm/sync_bitops.h>
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-
-#include <xen/events.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/event_channel.h>
-
-#include "xen-ops.h"
-
-/*
- * This lock protects updates to the following mapping and reference-count
- * arrays. The lock does not need to be acquired to read the mapping tables.
- */
-static DEFINE_SPINLOCK(irq_mapping_update_lock);
-
-/* IRQ <-> VIRQ mapping. */
-static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
-
-/* IRQ <-> IPI mapping */
-static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
-
-/* Packed IRQ information: binding type, sub-type index, and event channel. */
-struct packed_irq
-{
- unsigned short evtchn;
- unsigned char index;
- unsigned char type;
-};
-
-static struct packed_irq irq_info[NR_IRQS];
-
-/* Binding types. */
-enum {
- IRQT_UNBOUND,
- IRQT_PIRQ,
- IRQT_VIRQ,
- IRQT_IPI,
- IRQT_EVTCHN
-};
-
-/* Convenient shorthand for packed representation of an unbound IRQ. */
-#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
-
-static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
- [0 ... NR_EVENT_CHANNELS-1] = -1
-};
-static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
-static u8 cpu_evtchn[NR_EVENT_CHANNELS];
-
-/* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
-
-/* Xen will never allocate port zero for any purpose. */
-#define VALID_EVTCHN(chn) ((chn) != 0)
-
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void force_evtchn_callback(void)
-{
- (void)HYPERVISOR_xen_version(0, NULL);
-}
-EXPORT_SYMBOL_GPL(force_evtchn_callback);
-
-static struct irq_chip xen_dynamic_chip;
-
-/* Constructor for packed IRQ information. */
-static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
-{
- return (struct packed_irq) { evtchn, index, type };
-}
-
-/*
- * Accessors for packed IRQ information.
- */
-static inline unsigned int evtchn_from_irq(int irq)
-{
- return irq_info[irq].evtchn;
-}
-
-static inline unsigned int index_from_irq(int irq)
-{
- return irq_info[irq].index;
-}
-
-static inline unsigned int type_from_irq(int irq)
-{
- return irq_info[irq].type;
-}
-
-static inline unsigned long active_evtchns(unsigned int cpu,
- struct shared_info *sh,
- unsigned int idx)
-{
- return (sh->evtchn_pending[idx] &
- cpu_evtchn_mask[cpu][idx] &
- ~sh->evtchn_mask[idx]);
-}
-
-static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
-{
- int irq = evtchn_to_irq[chn];
-
- BUG_ON(irq == -1);
-#ifdef CONFIG_SMP
- irq_desc[irq].affinity = cpumask_of_cpu(cpu);
-#endif
-
- __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
- __set_bit(chn, cpu_evtchn_mask[cpu]);
-
- cpu_evtchn[chn] = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
-#ifdef CONFIG_SMP
- int i;
- /* By default all event channels notify CPU#0. */
- for (i = 0; i < NR_IRQS; i++)
- irq_desc[i].affinity = cpumask_of_cpu(0);
-#endif
-
- memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
- memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
-}
-
-static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
-{
- return cpu_evtchn[evtchn];
-}
-
-static inline void clear_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_clear_bit(port, &s->evtchn_pending[0]);
-}
-
-static inline void set_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_set_bit(port, &s->evtchn_pending[0]);
-}
-
-
-/**
- * notify_remote_via_irq - send event to remote end of event channel via irq
- * @irq: irq of event channel to send event to
- *
- * Unlike notify_remote_via_evtchn(), this is safe to use across
- * save/restore. Notifications on a broken connection are silently
- * dropped.
- */
-void notify_remote_via_irq(int irq)
-{
- int evtchn = evtchn_from_irq(irq);
-
- if (VALID_EVTCHN(evtchn))
- notify_remote_via_evtchn(evtchn);
-}
-EXPORT_SYMBOL_GPL(notify_remote_via_irq);
-
-static void mask_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_set_bit(port, &s->evtchn_mask[0]);
-}
-
-static void unmask_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- unsigned int cpu = get_cpu();
-
- BUG_ON(!irqs_disabled());
-
- /* Slow path (hypercall) if this is a non-local port. */
- if (unlikely(cpu != cpu_from_evtchn(port))) {
- struct evtchn_unmask unmask = { .port = port };
- (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
- } else {
- struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
-
- sync_clear_bit(port, &s->evtchn_mask[0]);
-
- /*
- * The following is basically the equivalent of
- * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
- * the interrupt edge' if the channel is masked.
- */
- if (sync_test_bit(port, &s->evtchn_pending[0]) &&
- !sync_test_and_set_bit(port / BITS_PER_LONG,
- &vcpu_info->evtchn_pending_sel))
- vcpu_info->evtchn_upcall_pending = 1;
- }
-
- put_cpu();
-}
-
-static int find_unbound_irq(void)
-{
- int irq;
-
- /* Only allocate from dynirq range */
- for (irq = 0; irq < NR_IRQS; irq++)
- if (irq_bindcount[irq] == 0)
- break;
-
- if (irq == NR_IRQS)
- panic("No available IRQ to bind to: increase NR_IRQS!\n");
-
- return irq;
-}
-
-int bind_evtchn_to_irq(unsigned int evtchn)
-{
- int irq;
-
- spin_lock(&irq_mapping_update_lock);
-
- irq = evtchn_to_irq[evtchn];
-
- if (irq == -1) {
- irq = find_unbound_irq();
-
- dynamic_irq_init(irq);
- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
- handle_level_irq, "event");
-
- evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
- }
-
- irq_bindcount[irq]++;
-
- spin_unlock(&irq_mapping_update_lock);
-
- return irq;
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
-
-static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
-{
- struct evtchn_bind_ipi bind_ipi;
- int evtchn, irq;
-
- spin_lock(&irq_mapping_update_lock);
-
- irq = per_cpu(ipi_to_irq, cpu)[ipi];
- if (irq == -1) {
- irq = find_unbound_irq();
- if (irq < 0)
- goto out;
-
- dynamic_irq_init(irq);
- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
- handle_level_irq, "ipi");
-
- bind_ipi.vcpu = cpu;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
- &bind_ipi) != 0)
- BUG();
- evtchn = bind_ipi.port;
-
- evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
-
- per_cpu(ipi_to_irq, cpu)[ipi] = irq;
-
- bind_evtchn_to_cpu(evtchn, cpu);
- }
-
- irq_bindcount[irq]++;
-
- out:
- spin_unlock(&irq_mapping_update_lock);
- return irq;
-}
-
-
-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
-{
- struct evtchn_bind_virq bind_virq;
- int evtchn, irq;
-
- spin_lock(&irq_mapping_update_lock);
-
- irq = per_cpu(virq_to_irq, cpu)[virq];
-
- if (irq == -1) {
- bind_virq.virq = virq;
- bind_virq.vcpu = cpu;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
- &bind_virq) != 0)
- BUG();
- evtchn = bind_virq.port;
-
- irq = find_unbound_irq();
-
- dynamic_irq_init(irq);
- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
- handle_level_irq, "virq");
-
- evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
-
- per_cpu(virq_to_irq, cpu)[virq] = irq;
-
- bind_evtchn_to_cpu(evtchn, cpu);
- }
-
- irq_bindcount[irq]++;
-
- spin_unlock(&irq_mapping_update_lock);
-
- return irq;
-}
-
-static void unbind_from_irq(unsigned int irq)
-{
- struct evtchn_close close;
- int evtchn = evtchn_from_irq(irq);
-
- spin_lock(&irq_mapping_update_lock);
-
- if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
- close.port = evtchn;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
-
- switch (type_from_irq(irq)) {
- case IRQT_VIRQ:
- per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
- [index_from_irq(irq)] = -1;
- break;
- default:
- break;
- }
-
- /* Closed ports are implicitly re-bound to VCPU0. */
- bind_evtchn_to_cpu(evtchn, 0);
-
- evtchn_to_irq[evtchn] = -1;
- irq_info[irq] = IRQ_UNBOUND;
-
- dynamic_irq_init(irq);
- }
-
- spin_unlock(&irq_mapping_update_lock);
-}
-
-int bind_evtchn_to_irqhandler(unsigned int evtchn,
- irqreturn_t (*handler)(int, void *),
- unsigned long irqflags,
- const char *devname, void *dev_id)
-{
- unsigned int irq;
- int retval;
-
- irq = bind_evtchn_to_irq(evtchn);
- retval = request_irq(irq, handler, irqflags, devname, dev_id);
- if (retval != 0) {
- unbind_from_irq(irq);
- return retval;
- }
-
- return irq;
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
-
-int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
- irqreturn_t (*handler)(int, void *),
- unsigned long irqflags, const char *devname, void *dev_id)
-{
- unsigned int irq;
- int retval;
-
- irq = bind_virq_to_irq(virq, cpu);
- retval = request_irq(irq, handler, irqflags, devname, dev_id);
- if (retval != 0) {
- unbind_from_irq(irq);
- return retval;
- }
-
- return irq;
-}
-EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
-
-int bind_ipi_to_irqhandler(enum ipi_vector ipi,
- unsigned int cpu,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname,
- void *dev_id)
-{
- int irq, retval;
-
- irq = bind_ipi_to_irq(ipi, cpu);
- if (irq < 0)
- return irq;
-
- retval = request_irq(irq, handler, irqflags, devname, dev_id);
- if (retval != 0) {
- unbind_from_irq(irq);
- return retval;
- }
-
- return irq;
-}
-
-void unbind_from_irqhandler(unsigned int irq, void *dev_id)
-{
- free_irq(irq, dev_id);
- unbind_from_irq(irq);
-}
-EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
-
-void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
-{
- int irq = per_cpu(ipi_to_irq, cpu)[vector];
- BUG_ON(irq < 0);
- notify_remote_via_irq(irq);
-}
-
-
-/*
- * Search the CPUs pending events bitmasks. For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
- *
- * Xen uses a two-level bitmap to speed searching. The first level is
- * a bitset of words which contain pending event bits. The second
- * level is a bitset of pending events themselves.
- */
-fastcall void xen_evtchn_do_upcall(struct pt_regs *regs)
-{
- int cpu = get_cpu();
- struct shared_info *s = HYPERVISOR_shared_info;
- struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
- unsigned long pending_words;
-
- vcpu_info->evtchn_upcall_pending = 0;
-
- /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
- pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
- while (pending_words != 0) {
- unsigned long pending_bits;
- int word_idx = __ffs(pending_words);
- pending_words &= ~(1UL << word_idx);
-
- while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
- int bit_idx = __ffs(pending_bits);
- int port = (word_idx * BITS_PER_LONG) + bit_idx;
- int irq = evtchn_to_irq[port];
-
- if (irq != -1) {
- regs->orig_eax = ~irq;
- do_IRQ(regs);
- }
- }
- }
-
- put_cpu();
-}
-
-/* Rebind an evtchn so that it gets delivered to a specific cpu */
-static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
-{
- struct evtchn_bind_vcpu bind_vcpu;
- int evtchn = evtchn_from_irq(irq);
-
- if (!VALID_EVTCHN(evtchn))
- return;
-
- /* Send future instances of this interrupt to other vcpu. */
- bind_vcpu.port = evtchn;
- bind_vcpu.vcpu = tcpu;
-
- /*
- * If this fails, it usually just indicates that we're dealing with a
- * virq or IPI channel, which don't actually need to be rebound. Ignore
- * it, but don't do the xenlinux-level rebind in that case.
- */
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
- bind_evtchn_to_cpu(evtchn, tcpu);
-}
-
-
-static void set_affinity_irq(unsigned irq, cpumask_t dest)
-{
- unsigned tcpu = first_cpu(dest);
- rebind_irq_to_cpu(irq, tcpu);
-}
-
-static void enable_dynirq(unsigned int irq)
-{
- int evtchn = evtchn_from_irq(irq);
-
- if (VALID_EVTCHN(evtchn))
- unmask_evtchn(evtchn);
-}
-
-static void disable_dynirq(unsigned int irq)
-{
- int evtchn = evtchn_from_irq(irq);
-
- if (VALID_EVTCHN(evtchn))
- mask_evtchn(evtchn);
-}
-
-static void ack_dynirq(unsigned int irq)
-{
- int evtchn = evtchn_from_irq(irq);
-
- move_native_irq(irq);
-
- if (VALID_EVTCHN(evtchn))
- clear_evtchn(evtchn);
-}
-
-static int retrigger_dynirq(unsigned int irq)
-{
- int evtchn = evtchn_from_irq(irq);
- int ret = 0;
-
- if (VALID_EVTCHN(evtchn)) {
- set_evtchn(evtchn);
- ret = 1;
- }
-
- return ret;
-}
-
-static struct irq_chip xen_dynamic_chip __read_mostly = {
- .name = "xen-dyn",
- .mask = disable_dynirq,
- .unmask = enable_dynirq,
- .ack = ack_dynirq,
- .set_affinity = set_affinity_irq,
- .retrigger = retrigger_dynirq,
-};
-
-void __init xen_init_IRQ(void)
-{
- int i;
-
- init_evtchn_cpu_bindings();
-
- /* No event channels are 'live' right now. */
- for (i = 0; i < NR_EVENT_CHANNELS; i++)
- mask_evtchn(i);
-
- /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
- for (i = 0; i < NR_IRQS; i++)
- irq_bindcount[i] = 0;
-
- irq_ctx_init(smp_processor_id());
-}
diff --git a/arch/i386/xen/features.c b/arch/i386/xen/features.c
deleted file mode 100644
index 0707714e40d..00000000000
--- a/arch/i386/xen/features.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/******************************************************************************
- * features.c
- *
- * Xen feature flags.
- *
- * Copyright (c) 2006, Ian Campbell, XenSource Inc.
- */
-#include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/module.h>
-#include <asm/xen/hypervisor.h>
-#include <xen/features.h>
-
-u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
-EXPORT_SYMBOL_GPL(xen_features);
-
-void xen_setup_features(void)
-{
- struct xen_feature_info fi;
- int i, j;
-
- for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
- fi.submap_idx = i;
- if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
- break;
- for (j = 0; j < 32; j++)
- xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
- }
-}
diff --git a/arch/i386/xen/manage.c b/arch/i386/xen/manage.c
deleted file mode 100644
index aa7af9e6abc..00000000000
--- a/arch/i386/xen/manage.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Handle extern requests for shutdown, reboot and sysrq
- */
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/reboot.h>
-#include <linux/sysrq.h>
-
-#include <xen/xenbus.h>
-
-#define SHUTDOWN_INVALID -1
-#define SHUTDOWN_POWEROFF 0
-#define SHUTDOWN_SUSPEND 2
-/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
- * report a crash, not be instructed to crash!
- * HALT is the same as POWEROFF, as far as we're concerned. The tools use
- * the distinction when we return the reason code to them.
- */
-#define SHUTDOWN_HALT 4
-
-/* Ignore multiple shutdown requests. */
-static int shutting_down = SHUTDOWN_INVALID;
-
-static void shutdown_handler(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
-{
- char *str;
- struct xenbus_transaction xbt;
- int err;
-
- if (shutting_down != SHUTDOWN_INVALID)
- return;
-
- again:
- err = xenbus_transaction_start(&xbt);
- if (err)
- return;
-
- str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
- /* Ignore read errors and empty reads. */
- if (XENBUS_IS_ERR_READ(str)) {
- xenbus_transaction_end(xbt, 1);
- return;
- }
-
- xenbus_write(xbt, "control", "shutdown", "");
-
- err = xenbus_transaction_end(xbt, 0);
- if (err == -EAGAIN) {
- kfree(str);
- goto again;
- }
-
- if (strcmp(str, "poweroff") == 0 ||
- strcmp(str, "halt") == 0)
- orderly_poweroff(false);
- else if (strcmp(str, "reboot") == 0)
- ctrl_alt_del();
- else {
- printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
- shutting_down = SHUTDOWN_INVALID;
- }
-
- kfree(str);
-}
-
-static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
- unsigned int len)
-{
- char sysrq_key = '\0';
- struct xenbus_transaction xbt;
- int err;
-
- again:
- err = xenbus_transaction_start(&xbt);
- if (err)
- return;
- if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
- printk(KERN_ERR "Unable to read sysrq code in "
- "control/sysrq\n");
- xenbus_transaction_end(xbt, 1);
- return;
- }
-
- if (sysrq_key != '\0')
- xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
-
- err = xenbus_transaction_end(xbt, 0);
- if (err == -EAGAIN)
- goto again;
-
- if (sysrq_key != '\0')
- handle_sysrq(sysrq_key, NULL);
-}
-
-static struct xenbus_watch shutdown_watch = {
- .node = "control/shutdown",
- .callback = shutdown_handler
-};
-
-static struct xenbus_watch sysrq_watch = {
- .node = "control/sysrq",
- .callback = sysrq_handler
-};
-
-static int setup_shutdown_watcher(void)
-{
- int err;
-
- err = register_xenbus_watch(&shutdown_watch);
- if (err) {
- printk(KERN_ERR "Failed to set shutdown watcher\n");
- return err;
- }
-
- err = register_xenbus_watch(&sysrq_watch);
- if (err) {
- printk(KERN_ERR "Failed to set sysrq watcher\n");
- return err;
- }
-
- return 0;
-}
-
-static int shutdown_event(struct notifier_block *notifier,
- unsigned long event,
- void *data)
-{
- setup_shutdown_watcher();
- return NOTIFY_DONE;
-}
-
-static int __init setup_shutdown_event(void)
-{
- static struct notifier_block xenstore_notifier = {
- .notifier_call = shutdown_event
- };
- register_xenstore_notifier(&xenstore_notifier);
-
- return 0;
-}
-
-subsys_initcall(setup_shutdown_event);
diff --git a/arch/i386/xen/mmu.c b/arch/i386/xen/mmu.c
deleted file mode 100644
index 874db0cd1d2..00000000000
--- a/arch/i386/xen/mmu.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- * Xen mmu operations
- *
- * This file contains the various mmu fetch and update operations.
- * The most important job they must perform is the mapping between the
- * domain's pfn and the overall machine mfns.
- *
- * Xen allows guests to directly update the pagetable, in a controlled
- * fashion. In other words, the guest modifies the same pagetable
- * that the CPU actually uses, which eliminates the overhead of having
- * a separate shadow pagetable.
- *
- * In order to allow this, it falls on the guest domain to map its
- * notion of a "physical" pfn - which is just a domain-local linear
- * address - into a real "machine address" which the CPU's MMU can
- * use.
- *
- * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
- * inserted directly into the pagetable. When creating a new
- * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely,
- * when reading the content back with __(pgd|pmd|pte)_val, it converts
- * the mfn back into a pfn.
- *
- * The other constraint is that all pages which make up a pagetable
- * must be mapped read-only in the guest. This prevents uncontrolled
- * guest updates to the pagetable. Xen strictly enforces this, and
- * will disallow any pagetable update which will end up mapping a
- * pagetable page RW, and will disallow using any writable page as a
- * pagetable.
- *
- * Naively, when loading %cr3 with the base of a new pagetable, Xen
- * would need to validate the whole pagetable before going on.
- * Naturally, this is quite slow. The solution is to "pin" a
- * pagetable, which enforces all the constraints on the pagetable even
- * when it is not actively in use. This menas that Xen can be assured
- * that it is still valid when you do load it into %cr3, and doesn't
- * need to revalidate it.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-#include <linux/sched.h>
-#include <linux/highmem.h>
-#include <linux/bug.h>
-#include <linux/sched.h>
-
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/paravirt.h>
-
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-
-#include <xen/page.h>
-#include <xen/interface/xen.h>
-
-#include "multicalls.h"
-#include "mmu.h"
-
-xmaddr_t arbitrary_virt_to_machine(unsigned long address)
-{
- pte_t *pte = lookup_address(address);
- unsigned offset = address & PAGE_MASK;
-
- BUG_ON(pte == NULL);
-
- return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
-}
-
-void make_lowmem_page_readonly(void *vaddr)
-{
- pte_t *pte, ptev;
- unsigned long address = (unsigned long)vaddr;
-
- pte = lookup_address(address);
- BUG_ON(pte == NULL);
-
- ptev = pte_wrprotect(*pte);
-
- if (HYPERVISOR_update_va_mapping(address, ptev, 0))
- BUG();
-}
-
-void make_lowmem_page_readwrite(void *vaddr)
-{
- pte_t *pte, ptev;
- unsigned long address = (unsigned long)vaddr;
-
- pte = lookup_address(address);
- BUG_ON(pte == NULL);
-
- ptev = pte_mkwrite(*pte);
-
- if (HYPERVISOR_update_va_mapping(address, ptev, 0))
- BUG();
-}
-
-
-void xen_set_pmd(pmd_t *ptr, pmd_t val)
-{
- struct multicall_space mcs;
- struct mmu_update *u;
-
- preempt_disable();
-
- mcs = xen_mc_entry(sizeof(*u));
- u = mcs.args;
- u->ptr = virt_to_machine(ptr).maddr;
- u->val = pmd_val_ma(val);
- MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-/*
- * Associate a virtual page frame with a given physical page frame
- * and protection flags for that frame.
- */
-void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = swapper_pg_dir + pgd_index(vaddr);
- if (pgd_none(*pgd)) {
- BUG();
- return;
- }
- pud = pud_offset(pgd, vaddr);
- if (pud_none(*pud)) {
- BUG();
- return;
- }
- pmd = pmd_offset(pud, vaddr);
- if (pmd_none(*pmd)) {
- BUG();
- return;
- }
- pte = pte_offset_kernel(pmd, vaddr);
- /* <mfn,flags> stored as-is, to permit clearing entries */
- xen_set_pte(pte, mfn_pte(mfn, flags));
-
- /*
- * It's enough to flush this one mapping.
- * (PGE mappings get flushed as well)
- */
- __flush_tlb_one(vaddr);
-}
-
-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval)
-{
- if (mm == current->mm || mm == &init_mm) {
- if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
- struct multicall_space mcs;
- mcs = xen_mc_entry(0);
-
- MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
- xen_mc_issue(PARAVIRT_LAZY_MMU);
- return;
- } else
- if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
- return;
- }
- xen_set_pte(ptep, pteval);
-}
-
-#ifdef CONFIG_X86_PAE
-void xen_set_pud(pud_t *ptr, pud_t val)
-{
- struct multicall_space mcs;
- struct mmu_update *u;
-
- preempt_disable();
-
- mcs = xen_mc_entry(sizeof(*u));
- u = mcs.args;
- u->ptr = virt_to_machine(ptr).maddr;
- u->val = pud_val_ma(val);
- MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-
- preempt_enable();
-}
-
-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
- ptep->pte_high = pte.pte_high;
- smp_wmb();
- ptep->pte_low = pte.pte_low;
-}
-
-void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
-{
- set_64bit((u64 *)ptep, pte_val_ma(pte));
-}
-
-void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- ptep->pte_low = 0;
- smp_wmb(); /* make sure low gets written first */
- ptep->pte_high = 0;
-}
-
-void xen_pmd_clear(pmd_t *pmdp)
-{
- xen_set_pmd(pmdp, __pmd(0));
-}
-
-unsigned long long xen_pte_val(pte_t pte)
-{
- unsigned long long ret = 0;
-
- if (pte.pte_low) {
- ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
- ret = machine_to_phys(XMADDR(ret)).paddr | 1;
- }
-
- return ret;
-}
-
-unsigned long long xen_pmd_val(pmd_t pmd)
-{
- unsigned long long ret = pmd.pmd;
- if (ret)
- ret = machine_to_phys(XMADDR(ret)).paddr | 1;
- return ret;
-}
-
-unsigned long long xen_pgd_val(pgd_t pgd)
-{
- unsigned long long ret = pgd.pgd;
- if (ret)
- ret = machine_to_phys(XMADDR(ret)).paddr | 1;
- return ret;
-}
-
-pte_t xen_make_pte(unsigned long long pte)
-{
- if (pte & 1)
- pte = phys_to_machine(XPADDR(pte)).maddr;
-
- return (pte_t){ pte, pte >> 32 };
-}
-
-pmd_t xen_make_pmd(unsigned long long pmd)
-{
- if (pmd & 1)
- pmd = phys_to_machine(XPADDR(pmd)).maddr;
-
- return (pmd_t){ pmd };
-}
-
-pgd_t xen_make_pgd(unsigned long long pgd)
-{
- if (pgd & _PAGE_PRESENT)
- pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
- return (pgd_t){ pgd };
-}
-#else /* !PAE */
-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
- *ptep = pte;
-}
-
-unsigned long xen_pte_val(pte_t pte)
-{
- unsigned long ret = pte.pte_low;
-
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr;
-
- return ret;
-}
-
-unsigned long xen_pgd_val(pgd_t pgd)
-{
- unsigned long ret = pgd.pgd;
- if (ret)
- ret = machine_to_phys(XMADDR(ret)).paddr | 1;
- return ret;
-}
-
-pte_t xen_make_pte(unsigned long pte)
-{
- if (pte & _PAGE_PRESENT)
- pte = phys_to_machine(XPADDR(pte)).maddr;
-
- return (pte_t){ pte };
-}
-
-pgd_t xen_make_pgd(unsigned long pgd)
-{
- if (pgd & _PAGE_PRESENT)
- pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
- return (pgd_t){ pgd };
-}
-#endif /* CONFIG_X86_PAE */
-
-
-
-/*
- (Yet another) pagetable walker. This one is intended for pinning a
- pagetable. This means that it walks a pagetable and calls the
- callback function on each page it finds making up the page table,
- at every level. It walks the entire pagetable, but it only bothers
- pinning pte pages which are below pte_limit. In the normal case
- this will be TASK_SIZE, but at boot we need to pin up to
- FIXADDR_TOP. But the important bit is that we don't pin beyond
- there, because then we start getting into Xen's ptes.
-*/
-static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
- unsigned long limit)
-{
- pgd_t *pgd = pgd_base;
- int flush = 0;
- unsigned long addr = 0;
- unsigned long pgd_next;
-
- BUG_ON(limit > FIXADDR_TOP);
-
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return 0;
-
- for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
- pud_t *pud;
- unsigned long pud_limit, pud_next;
-
- pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
-
- if (!pgd_val(*pgd))
- continue;
-
- pud = pud_offset(pgd, 0);
-
- if (PTRS_PER_PUD > 1) /* not folded */
- flush |= (*func)(virt_to_page(pud), 0);
-
- for (; addr != pud_limit; pud++, addr = pud_next) {
- pmd_t *pmd;
- unsigned long pmd_limit;
-
- pud_next = pud_addr_end(addr, pud_limit);
-
- if (pud_next < limit)
- pmd_limit = pud_next;
- else
- pmd_limit = limit;
-
- if (pud_none(*pud))
- continue;
-
- pmd = pmd_offset(pud, 0);
-
- if (PTRS_PER_PMD > 1) /* not folded */
- flush |= (*func)(virt_to_page(pmd), 0);
-
- for (; addr != pmd_limit; pmd++) {
- addr += (PAGE_SIZE * PTRS_PER_PTE);
- if ((pmd_limit-1) < (addr-1)) {
- addr = pmd_limit;
- break;
- }
-
- if (pmd_none(*pmd))
- continue;
-
- flush |= (*func)(pmd_page(*pmd), 0);
- }
- }
- }
-
- flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH);
-
- return flush;
-}
-
-static int pin_page(struct page *page, unsigned flags)
-{
- unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
- int flush;
-
- if (pgfl)
- flush = 0; /* already pinned */
- else if (PageHighMem(page))
- /* kmaps need flushing if we found an unpinned
- highpage */
- flush = 1;
- else {
- void *pt = lowmem_page_address(page);
- unsigned long pfn = page_to_pfn(page);
- struct multicall_space mcs = __xen_mc_entry(0);
-
- flush = 0;
-
- MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
- pfn_pte(pfn, PAGE_KERNEL_RO),
- flags);
- }
-
- return flush;
-}
-
-/* This is called just after a mm has been created, but it has not
- been used yet. We need to make sure that its pagetable is all
- read-only, and can be pinned. */
-void xen_pgd_pin(pgd_t *pgd)
-{
- struct multicall_space mcs;
- struct mmuext_op *op;
-
- xen_mc_batch();
-
- if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
- /* re-enable interrupts for kmap_flush_unused */
- xen_mc_issue(0);
- kmap_flush_unused();
- xen_mc_batch();
- }
-
- mcs = __xen_mc_entry(sizeof(*op));
- op = mcs.args;
-
-#ifdef CONFIG_X86_PAE
- op->cmd = MMUEXT_PIN_L3_TABLE;
-#else
- op->cmd = MMUEXT_PIN_L2_TABLE;
-#endif
- op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- xen_mc_issue(0);
-}
-
-/* The init_mm pagetable is really pinned as soon as its created, but
- that's before we have page structures to store the bits. So do all
- the book-keeping now. */
-static __init int mark_pinned(struct page *page, unsigned flags)
-{
- SetPagePinned(page);
- return 0;
-}
-
-void __init xen_mark_init_mm_pinned(void)
-{
- pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
-}
-
-static int unpin_page(struct page *page, unsigned flags)
-{
- unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
-
- if (pgfl && !PageHighMem(page)) {
- void *pt = lowmem_page_address(page);
- unsigned long pfn = page_to_pfn(page);
- struct multicall_space mcs = __xen_mc_entry(0);
-
- MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
- pfn_pte(pfn, PAGE_KERNEL),
- flags);
- }
-
- return 0; /* never need to flush on unpin */
-}
-
-/* Release a pagetables pages back as normal RW */
-static void xen_pgd_unpin(pgd_t *pgd)
-{
- struct mmuext_op *op;
- struct multicall_space mcs;
-
- xen_mc_batch();
-
- mcs = __xen_mc_entry(sizeof(*op));
-
- op = mcs.args;
- op->cmd = MMUEXT_UNPIN_TABLE;
- op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
-
- MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
-
- pgd_walk(pgd, unpin_page, TASK_SIZE);
-
- xen_mc_issue(0);
-}
-
-void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
-{
- spin_lock(&next->page_table_lock);
- xen_pgd_pin(next->pgd);
- spin_unlock(&next->page_table_lock);
-}
-
-void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
-{
- spin_lock(&mm->page_table_lock);
- xen_pgd_pin(mm->pgd);
- spin_unlock(&mm->page_table_lock);
-}
-
-
-#ifdef CONFIG_SMP
-/* Another cpu may still have their %cr3 pointing at the pagetable, so
- we need to repoint it somewhere else before we can unpin it. */
-static void drop_other_mm_ref(void *info)
-{
- struct mm_struct *mm = info;
-
- if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
- leave_mm(smp_processor_id());
-}
-
-static void drop_mm_ref(struct mm_struct *mm)
-{
- if (current->active_mm == mm) {
- if (current->mm == mm)
- load_cr3(swapper_pg_dir);
- else
- leave_mm(smp_processor_id());
- }
-
- if (!cpus_empty(mm->cpu_vm_mask))
- xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref,
- mm, 1);
-}
-#else
-static void drop_mm_ref(struct mm_struct *mm)
-{
- if (current->active_mm == mm)
- load_cr3(swapper_pg_dir);
-}
-#endif
-
-/*
- * While a process runs, Xen pins its pagetables, which means that the
- * hypervisor forces it to be read-only, and it controls all updates
- * to it. This means that all pagetable updates have to go via the
- * hypervisor, which is moderately expensive.
- *
- * Since we're pulling the pagetable down, we switch to use init_mm,
- * unpin old process pagetable and mark it all read-write, which
- * allows further operations on it to be simple memory accesses.
- *
- * The only subtle point is that another CPU may be still using the
- * pagetable because of lazy tlb flushing. This means we need need to
- * switch all CPUs off this pagetable before we can unpin it.
- */
-void xen_exit_mmap(struct mm_struct *mm)
-{
- get_cpu(); /* make sure we don't move around */
- drop_mm_ref(mm);
- put_cpu();
-
- spin_lock(&mm->page_table_lock);
-
- /* pgd may not be pinned in the error exit path of execve */
- if (PagePinned(virt_to_page(mm->pgd)))
- xen_pgd_unpin(mm->pgd);
- spin_unlock(&mm->page_table_lock);
-}
diff --git a/arch/i386/xen/mmu.h b/arch/i386/xen/mmu.h
deleted file mode 100644
index c9ff27f3ac3..00000000000
--- a/arch/i386/xen/mmu.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _XEN_MMU_H
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-/*
- * Page-directory addresses above 4GB do not fit into architectural %cr3.
- * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
- * must use the following accessor macros to pack/unpack valid MFNs.
- *
- * Note that Xen is using the fact that the pagetable base is always
- * page-aligned, and putting the 12 MSB of the address into the 12 LSB
- * of cr3.
- */
-#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
-#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
-
-
-void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-
-void xen_set_pte(pte_t *ptep, pte_t pteval);
-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval);
-void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
-
-void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
-void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
-void xen_exit_mmap(struct mm_struct *mm);
-
-void xen_pgd_pin(pgd_t *pgd);
-//void xen_pgd_unpin(pgd_t *pgd);
-
-#ifdef CONFIG_X86_PAE
-unsigned long long xen_pte_val(pte_t);
-unsigned long long xen_pmd_val(pmd_t);
-unsigned long long xen_pgd_val(pgd_t);
-
-pte_t xen_make_pte(unsigned long long);
-pmd_t xen_make_pmd(unsigned long long);
-pgd_t xen_make_pgd(unsigned long long);
-
-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval);
-void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
-void xen_set_pud(pud_t *ptr, pud_t val);
-void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-void xen_pmd_clear(pmd_t *pmdp);
-
-
-#else
-unsigned long xen_pte_val(pte_t);
-unsigned long xen_pmd_val(pmd_t);
-unsigned long xen_pgd_val(pgd_t);
-
-pte_t xen_make_pte(unsigned long);
-pmd_t xen_make_pmd(unsigned long);
-pgd_t xen_make_pgd(unsigned long);
-#endif
-
-#endif /* _XEN_MMU_H */
diff --git a/arch/i386/xen/multicalls.c b/arch/i386/xen/multicalls.c
deleted file mode 100644
index c837e8e463d..00000000000
--- a/arch/i386/xen/multicalls.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Xen hypercall batching.
- *
- * Xen allows multiple hypercalls to be issued at once, using the
- * multicall interface. This allows the cost of trapping into the
- * hypervisor to be amortized over several calls.
- *
- * This file implements a simple interface for multicalls. There's a
- * per-cpu buffer of outstanding multicalls. When you want to queue a
- * multicall for issuing, you can allocate a multicall slot for the
- * call and its arguments, along with storage for space which is
- * pointed to by the arguments (for passing pointers to structures,
- * etc). When the multicall is actually issued, all the space for the
- * commands and allocated memory is freed for reuse.
- *
- * Multicalls are flushed whenever any of the buffers get full, or
- * when explicitly requested. There's no way to get per-multicall
- * return results back. It will BUG if any of the multicalls fail.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-#include <linux/percpu.h>
-#include <linux/hardirq.h>
-
-#include <asm/xen/hypercall.h>
-
-#include "multicalls.h"
-
-#define MC_BATCH 32
-#define MC_ARGS (MC_BATCH * 16 / sizeof(u64))
-
-struct mc_buffer {
- struct multicall_entry entries[MC_BATCH];
- u64 args[MC_ARGS];
- unsigned mcidx, argidx;
-};
-
-static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
-DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
-
-void xen_mc_flush(void)
-{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
- int ret = 0;
- unsigned long flags;
-
- BUG_ON(preemptible());
-
- /* Disable interrupts in case someone comes in and queues
- something in the middle */
- local_irq_save(flags);
-
- if (b->mcidx) {
- int i;
-
- if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
- BUG();
- for (i = 0; i < b->mcidx; i++)
- if (b->entries[i].result < 0)
- ret++;
- b->mcidx = 0;
- b->argidx = 0;
- } else
- BUG_ON(b->argidx != 0);
-
- local_irq_restore(flags);
-
- BUG_ON(ret);
-}
-
-struct multicall_space __xen_mc_entry(size_t args)
-{
- struct mc_buffer *b = &__get_cpu_var(mc_buffer);
- struct multicall_space ret;
- unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
-
- BUG_ON(preemptible());
- BUG_ON(argspace > MC_ARGS);
-
- if (b->mcidx == MC_BATCH ||
- (b->argidx + argspace) > MC_ARGS)
- xen_mc_flush();
-
- ret.mc = &b->entries[b->mcidx];
- b->mcidx++;
- ret.args = &b->args[b->argidx];
- b->argidx += argspace;
-
- return ret;
-}
diff --git a/arch/i386/xen/multicalls.h b/arch/i386/xen/multicalls.h
deleted file mode 100644
index e6f7530b156..00000000000
--- a/arch/i386/xen/multicalls.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef _XEN_MULTICALLS_H
-#define _XEN_MULTICALLS_H
-
-#include "xen-ops.h"
-
-/* Multicalls */
-struct multicall_space
-{
- struct multicall_entry *mc;
- void *args;
-};
-
-/* Allocate room for a multicall and its args */
-struct multicall_space __xen_mc_entry(size_t args);
-
-DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags);
-
-/* Call to start a batch of multiple __xen_mc_entry()s. Must be
- paired with xen_mc_issue() */
-static inline void xen_mc_batch(void)
-{
- /* need to disable interrupts until this entry is complete */
- local_irq_save(__get_cpu_var(xen_mc_irq_flags));
-}
-
-static inline struct multicall_space xen_mc_entry(size_t args)
-{
- xen_mc_batch();
- return __xen_mc_entry(args);
-}
-
-/* Flush all pending multicalls */
-void xen_mc_flush(void);
-
-/* Issue a multicall if we're not in a lazy mode */
-static inline void xen_mc_issue(unsigned mode)
-{
- if ((xen_get_lazy_mode() & mode) == 0)
- xen_mc_flush();
-
- /* restore flags saved in xen_mc_batch */
- local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
-}
-
-#endif /* _XEN_MULTICALLS_H */
diff --git a/arch/i386/xen/setup.c b/arch/i386/xen/setup.c
deleted file mode 100644
index f84e7722664..00000000000
--- a/arch/i386/xen/setup.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Machine specific setup for xen
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/pm.h>
-
-#include <asm/elf.h>
-#include <asm/e820.h>
-#include <asm/setup.h>
-#include <asm/xen/hypervisor.h>
-#include <asm/xen/hypercall.h>
-
-#include <xen/interface/physdev.h>
-#include <xen/features.h>
-
-#include "xen-ops.h"
-#include "vdso.h"
-
-/* These are code, but not functions. Defined in entry.S */
-extern const char xen_hypervisor_callback[];
-extern const char xen_failsafe_callback[];
-
-unsigned long *phys_to_machine_mapping;
-EXPORT_SYMBOL(phys_to_machine_mapping);
-
-/**
- * machine_specific_memory_setup - Hook for machine specific memory setup.
- **/
-
-char * __init xen_memory_setup(void)
-{
- unsigned long max_pfn = xen_start_info->nr_pages;
-
- e820.nr_map = 0;
- add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM);
-
- return "Xen";
-}
-
-static void xen_idle(void)
-{
- local_irq_disable();
-
- if (need_resched())
- local_irq_enable();
- else {
- current_thread_info()->status &= ~TS_POLLING;
- smp_mb__after_clear_bit();
- safe_halt();
- current_thread_info()->status |= TS_POLLING;
- }
-}
-
-/*
- * Set the bit indicating "nosegneg" library variants should be used.
- */
-static void fiddle_vdso(void)
-{
- extern u32 VDSO_NOTE_MASK; /* See ../kernel/vsyscall-note.S. */
- extern char vsyscall_int80_start;
- u32 *mask = (u32 *) ((unsigned long) &VDSO_NOTE_MASK - VDSO_PRELINK +
- &vsyscall_int80_start);
- *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
-}
-
-void __init xen_arch_setup(void)
-{
- struct physdev_set_iopl set_iopl;
- int rc;
-
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
-
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
-
- HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
- __KERNEL_CS, (unsigned long)xen_failsafe_callback);
-
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- printk(KERN_INFO "physdev_op failed %d\n", rc);
-
-#ifdef CONFIG_ACPI
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
- disable_acpi();
- }
-#endif
-
- memcpy(boot_command_line, xen_start_info->cmd_line,
- MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
- COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
-
- pm_idle = xen_idle;
-
-#ifdef CONFIG_SMP
- /* fill cpus_possible with all available cpus */
- xen_fill_possible_map();
-#endif
-
- paravirt_disable_iospace();
-
- fiddle_vdso();
-}
diff --git a/arch/i386/xen/smp.c b/arch/i386/xen/smp.c
deleted file mode 100644
index 557b8e24706..00000000000
--- a/arch/i386/xen/smp.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * Xen SMP support
- *
- * This file implements the Xen versions of smp_ops. SMP under Xen is
- * very straightforward. Bringing a CPU up is simply a matter of
- * loading its initial context and setting it running.
- *
- * IPIs are handled through the Xen event mechanism.
- *
- * Because virtual CPUs can be scheduled onto any real CPU, there's no
- * useful topology information for the kernel to make use of. As a
- * result, all CPUs are treated as if they're single-core and
- * single-threaded.
- *
- * This does not handle HOTPLUG_CPU yet.
- */
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/smp.h>
-
-#include <asm/paravirt.h>
-#include <asm/desc.h>
-#include <asm/pgtable.h>
-#include <asm/cpu.h>
-
-#include <xen/interface/xen.h>
-#include <xen/interface/vcpu.h>
-
-#include <asm/xen/interface.h>
-#include <asm/xen/hypercall.h>
-
-#include <xen/page.h>
-#include <xen/events.h>
-
-#include "xen-ops.h"
-#include "mmu.h"
-
-static cpumask_t cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq);
-static DEFINE_PER_CPU(int, callfunc_irq);
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
-
-static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
-
-static struct call_data_struct *call_data;
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
-{
- return IRQ_HANDLED;
-}
-
-static __cpuinit void cpu_bringup_and_idle(void)
-{
- int cpu = smp_processor_id();
-
- cpu_init();
-
- preempt_disable();
- per_cpu(cpu_state, cpu) = CPU_ONLINE;
-
- xen_setup_cpu_clockevents();
-
- /* We can take interrupts now: we're officially "up". */
- local_irq_enable();
-
- wmb(); /* make sure everything is out */
- cpu_idle();
-}
-
-static int xen_smp_intr_init(unsigned int cpu)
-{
- int rc;
- const char *resched_name, *callfunc_name;
-
- per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
-
- resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
- rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
- cpu,
- xen_reschedule_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
- resched_name,
- NULL);
- if (rc < 0)
- goto fail;
- per_cpu(resched_irq, cpu) = rc;
-
- callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
- rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
- cpu,
- xen_call_function_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
- callfunc_name,
- NULL);
- if (rc < 0)
- goto fail;
- per_cpu(callfunc_irq, cpu) = rc;
-
- return 0;
-
- fail:
- if (per_cpu(resched_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
- if (per_cpu(callfunc_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
- return rc;
-}
-
-void __init xen_fill_possible_map(void)
-{
- int i, rc;
-
- for (i = 0; i < NR_CPUS; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0)
- cpu_set(i, cpu_possible_map);
- }
-}
-
-void __init xen_smp_prepare_boot_cpu(void)
-{
- int cpu;
-
- BUG_ON(smp_processor_id() != 0);
- native_smp_prepare_boot_cpu();
-
- /* We've switched to the "real" per-cpu gdt, so make sure the
- old memory can be recycled */
- make_lowmem_page_readwrite(&per_cpu__gdt_page);
-
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- }
-
- xen_setup_vcpu_info_placement();
-}
-
-void __init xen_smp_prepare_cpus(unsigned int max_cpus)
-{
- unsigned cpu;
-
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- }
-
- smp_store_cpu_info(0);
- set_cpu_sibling_map(0);
-
- if (xen_smp_intr_init(0))
- BUG();
-
- cpu_initialized_map = cpumask_of_cpu(0);
-
- /* Restrict the possible_map according to max_cpus. */
- while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
- for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
- continue;
- cpu_clear(cpu, cpu_possible_map);
- }
-
- for_each_possible_cpu (cpu) {
- struct task_struct *idle;
-
- if (cpu == 0)
- continue;
-
- idle = fork_idle(cpu);
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
-
- cpu_set(cpu, cpu_present_map);
- }
-
- //init_xenbus_allowed_cpumask();
-}
-
-static __cpuinit int
-cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
-{
- struct vcpu_guest_context *ctxt;
- struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
-
- if (cpu_test_and_set(cpu, cpu_initialized_map))
- return 0;
-
- ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
- if (ctxt == NULL)
- return -ENOMEM;
-
- ctxt->flags = VGCF_IN_KERNEL;
- ctxt->user_regs.ds = __USER_DS;
- ctxt->user_regs.es = __USER_DS;
- ctxt->user_regs.fs = __KERNEL_PERCPU;
- ctxt->user_regs.gs = 0;
- ctxt->user_regs.ss = __KERNEL_DS;
- ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
- ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
-
- memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
-
- xen_copy_trap_info(ctxt->trap_ctxt);
-
- ctxt->ldt_ents = 0;
-
- BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
- make_lowmem_page_readonly(gdt->gdt);
-
- ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
- ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt);
-
- ctxt->user_regs.cs = __KERNEL_CS;
- ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
-
- ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.esp0;
-
- ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
- ctxt->failsafe_callback_cs = __KERNEL_CS;
- ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
-
- per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
- ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
-
- if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
- BUG();
-
- kfree(ctxt);
- return 0;
-}
-
-int __cpuinit xen_cpu_up(unsigned int cpu)
-{
- struct task_struct *idle = idle_task(cpu);
- int rc;
-
-#if 0
- rc = cpu_up_check(cpu);
- if (rc)
- return rc;
-#endif
-
- init_gdt(cpu);
- per_cpu(current_task, cpu) = idle;
- irq_ctx_init(cpu);
- xen_setup_timer(cpu);
-
- /* make sure interrupts start blocked */
- per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
-
- rc = cpu_initialize_context(cpu, idle);
- if (rc)
- return rc;
-
- if (num_online_cpus() == 1)
- alternatives_smp_switch(1);
-
- rc = xen_smp_intr_init(cpu);
- if (rc)
- return rc;
-
- smp_store_cpu_info(cpu);
- set_cpu_sibling_map(cpu);
- /* This must be done before setting cpu_online_map */
- wmb();
-
- cpu_set(cpu, cpu_online_map);
-
- rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
- BUG_ON(rc);
-
- return 0;
-}
-
-void xen_smp_cpus_done(unsigned int max_cpus)
-{
-}
-
-static void stop_self(void *v)
-{
- int cpu = smp_processor_id();
-
- /* make sure we're not pinning something down */
- load_cr3(swapper_pg_dir);
- /* should set up a minimal gdt */
-
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
- BUG();
-}
-
-void xen_smp_send_stop(void)
-{
- smp_call_function(stop_self, NULL, 0, 0);
-}
-
-void xen_smp_send_reschedule(int cpu)
-{
- xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
-}
-
-
-static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
-{
- unsigned cpu;
-
- cpus_and(mask, mask, cpu_online_map);
-
- for_each_cpu_mask(cpu, mask)
- xen_send_IPI_one(cpu, vector);
-}
-
-static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
-{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- irq_enter();
- (*func)(info);
- irq_exit();
-
- if (wait) {
- mb(); /* commit everything before setting finished */
- atomic_inc(&call_data->finished);
- }
-
- return IRQ_HANDLED;
-}
-
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
- void *info, int wait)
-{
- struct call_data_struct data;
- int cpus;
-
- /* Holding any lock stops cpus from going down. */
- spin_lock(&call_lock);
-
- cpu_clear(smp_processor_id(), mask);
-
- cpus = cpus_weight(mask);
- if (!cpus) {
- spin_unlock(&call_lock);
- return 0;
- }
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- mb(); /* write everything before IPI */
-
- /* Send a message to other CPUs and wait for them to respond */
- xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
-
- /* Make sure other vcpus get a chance to run.
- XXX too severe? Maybe we should check the other CPU's states? */
- HYPERVISOR_sched_op(SCHEDOP_yield, 0);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus ||
- (wait && atomic_read(&data.finished) != cpus))
- cpu_relax();
-
- spin_unlock(&call_lock);
-
- return 0;
-}
diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c
deleted file mode 100644
index dfd6db69ead..00000000000
--- a/arch/i386/xen/time.c
+++ /dev/null
@@ -1,593 +0,0 @@
-/*
- * Xen time implementation.
- *
- * This is implemented in terms of a clocksource driver which uses
- * the hypervisor clock as a nanosecond timebase, and a clockevent
- * driver which uses the hypervisor's timer mechanism.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/kernel_stat.h>
-
-#include <asm/xen/hypervisor.h>
-#include <asm/xen/hypercall.h>
-
-#include <xen/events.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/vcpu.h>
-
-#include "xen-ops.h"
-
-#define XEN_SHIFT 22
-
-/* Xen may fire a timer up to this many ns early */
-#define TIMER_SLOP 100000
-#define NS_PER_TICK (1000000000LL / HZ)
-
-static cycle_t xen_clocksource_read(void);
-
-/* These are perodically updated in shared_info, and then copied here. */
-struct shadow_time_info {
- u64 tsc_timestamp; /* TSC at last update of time vals. */
- u64 system_timestamp; /* Time, in nanosecs, since boot. */
- u32 tsc_to_nsec_mul;
- int tsc_shift;
- u32 version;
-};
-
-static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-
-/* runstate info updated by Xen */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
-
-/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
-
-/* unused ns of stolen and blocked time */
-static DEFINE_PER_CPU(u64, residual_stolen);
-static DEFINE_PER_CPU(u64, residual_blocked);
-
-/* return an consistent snapshot of 64-bit time/counter value */
-static u64 get64(const u64 *p)
-{
- u64 ret;
-
- if (BITS_PER_LONG < 64) {
- u32 *p32 = (u32 *)p;
- u32 h, l;
-
- /*
- * Read high then low, and then make sure high is
- * still the same; this will only loop if low wraps
- * and carries into high.
- * XXX some clean way to make this endian-proof?
- */
- do {
- h = p32[1];
- barrier();
- l = p32[0];
- barrier();
- } while (p32[1] != h);
-
- ret = (((u64)h) << 32) | l;
- } else
- ret = *p;
-
- return ret;
-}
-
-/*
- * Runstate accounting
- */
-static void get_runstate_snapshot(struct vcpu_runstate_info *res)
-{
- u64 state_time;
- struct vcpu_runstate_info *state;
-
- BUG_ON(preemptible());
-
- state = &__get_cpu_var(runstate);
-
- /*
- * The runstate info is always updated by the hypervisor on
- * the current CPU, so there's no need to use anything
- * stronger than a compiler barrier when fetching it.
- */
- do {
- state_time = get64(&state->state_entry_time);
- barrier();
- *res = *state;
- barrier();
- } while (get64(&state->state_entry_time) != state_time);
-}
-
-static void setup_runstate_info(int cpu)
-{
- struct vcpu_register_runstate_memory_area area;
-
- area.addr.v = &per_cpu(runstate, cpu);
-
- if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
- cpu, &area))
- BUG();
-}
-
-static void do_stolen_accounting(void)
-{
- struct vcpu_runstate_info state;
- struct vcpu_runstate_info *snap;
- s64 blocked, runnable, offline, stolen;
- cputime_t ticks;
-
- get_runstate_snapshot(&state);
-
- WARN_ON(state.state != RUNSTATE_running);
-
- snap = &__get_cpu_var(runstate_snapshot);
-
- /* work out how much time the VCPU has not been runn*ing* */
- blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
- runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
- offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
-
- *snap = state;
-
- /* Add the appropriate number of ticks of stolen time,
- including any left-overs from last time. Passing NULL to
- account_steal_time accounts the time as stolen. */
- stolen = runnable + offline + __get_cpu_var(residual_stolen);
-
- if (stolen < 0)
- stolen = 0;
-
- ticks = 0;
- while (stolen >= NS_PER_TICK) {
- ticks++;
- stolen -= NS_PER_TICK;
- }
- __get_cpu_var(residual_stolen) = stolen;
- account_steal_time(NULL, ticks);
-
- /* Add the appropriate number of ticks of blocked time,
- including any left-overs from last time. Passing idle to
- account_steal_time accounts the time as idle/wait. */
- blocked += __get_cpu_var(residual_blocked);
-
- if (blocked < 0)
- blocked = 0;
-
- ticks = 0;
- while (blocked >= NS_PER_TICK) {
- ticks++;
- blocked -= NS_PER_TICK;
- }
- __get_cpu_var(residual_blocked) = blocked;
- account_steal_time(idle_task(smp_processor_id()), ticks);
-}
-
-/*
- * Xen sched_clock implementation. Returns the number of unstolen
- * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
- * states.
- */
-unsigned long long xen_sched_clock(void)
-{
- struct vcpu_runstate_info state;
- cycle_t now;
- u64 ret;
- s64 offset;
-
- /*
- * Ideally sched_clock should be called on a per-cpu basis
- * anyway, so preempt should already be disabled, but that's
- * not current practice at the moment.
- */
- preempt_disable();
-
- now = xen_clocksource_read();
-
- get_runstate_snapshot(&state);
-
- WARN_ON(state.state != RUNSTATE_running);
-
- offset = now - state.state_entry_time;
- if (offset < 0)
- offset = 0;
-
- ret = state.time[RUNSTATE_blocked] +
- state.time[RUNSTATE_running] +
- offset;
-
- preempt_enable();
-
- return ret;
-}
-
-
-/* Get the CPU speed from Xen */
-unsigned long xen_cpu_khz(void)
-{
- u64 cpu_khz = 1000000ULL << 32;
- const struct vcpu_time_info *info =
- &HYPERVISOR_shared_info->vcpu_info[0].time;
-
- do_div(cpu_khz, info->tsc_to_system_mul);
- if (info->tsc_shift < 0)
- cpu_khz <<= -info->tsc_shift;
- else
- cpu_khz >>= info->tsc_shift;
-
- return cpu_khz;
-}
-
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area.
- */
-static unsigned get_time_values_from_xen(void)
-{
- struct vcpu_time_info *src;
- struct shadow_time_info *dst;
-
- /* src is shared memory with the hypervisor, so we need to
- make sure we get a consistent snapshot, even in the face of
- being preempted. */
- src = &__get_cpu_var(xen_vcpu)->time;
- dst = &__get_cpu_var(shadow_time);
-
- do {
- dst->version = src->version;
- rmb(); /* fetch version before data */
- dst->tsc_timestamp = src->tsc_timestamp;
- dst->system_timestamp = src->system_time;
- dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
- dst->tsc_shift = src->tsc_shift;
- rmb(); /* test version after fetching data */
- } while ((src->version & 1) | (dst->version ^ src->version));
-
- return dst->version;
-}
-
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
- u64 product;
-#ifdef __i386__
- u32 tmp1, tmp2;
-#endif
-
- if (shift < 0)
- delta >>= -shift;
- else
- delta <<= shift;
-
-#ifdef __i386__
- __asm__ (
- "mul %5 ; "
- "mov %4,%%eax ; "
- "mov %%edx,%4 ; "
- "mul %5 ; "
- "xor %5,%5 ; "
- "add %4,%%eax ; "
- "adc %5,%%edx ; "
- : "=A" (product), "=r" (tmp1), "=r" (tmp2)
- : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif __x86_64__
- __asm__ (
- "mul %%rdx ; shrd $32,%%rdx,%%rax"
- : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
- return product;
-}
-
-static u64 get_nsec_offset(struct shadow_time_info *shadow)
-{
- u64 now, delta;
- now = native_read_tsc();
- delta = now - shadow->tsc_timestamp;
- return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
-}
-
-static cycle_t xen_clocksource_read(void)
-{
- struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
- cycle_t ret;
- unsigned version;
-
- do {
- version = get_time_values_from_xen();
- barrier();
- ret = shadow->system_timestamp + get_nsec_offset(shadow);
- barrier();
- } while (version != __get_cpu_var(xen_vcpu)->time.version);
-
- put_cpu_var(shadow_time);
-
- return ret;
-}
-
-static void xen_read_wallclock(struct timespec *ts)
-{
- const struct shared_info *s = HYPERVISOR_shared_info;
- u32 version;
- u64 delta;
- struct timespec now;
-
- /* get wallclock at system boot */
- do {
- version = s->wc_version;
- rmb(); /* fetch version before time */
- now.tv_sec = s->wc_sec;
- now.tv_nsec = s->wc_nsec;
- rmb(); /* fetch time before checking version */
- } while ((s->wc_version & 1) | (version ^ s->wc_version));
-
- delta = xen_clocksource_read(); /* time since system boot */
- delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
-
- now.tv_nsec = do_div(delta, NSEC_PER_SEC);
- now.tv_sec = delta;
-
- set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
-}
-
-unsigned long xen_get_wallclock(void)
-{
- struct timespec ts;
-
- xen_read_wallclock(&ts);
-
- return ts.tv_sec;
-}
-
-int xen_set_wallclock(unsigned long now)
-{
- /* do nothing for domU */
- return -1;
-}
-
-static struct clocksource xen_clocksource __read_mostly = {
- .name = "xen",
- .rating = 400,
- .read = xen_clocksource_read,
- .mask = ~0,
- .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */
- .shift = XEN_SHIFT,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-/*
- Xen clockevent implementation
-
- Xen has two clockevent implementations:
-
- The old timer_op one works with all released versions of Xen prior
- to version 3.0.4. This version of the hypervisor provides a
- single-shot timer with nanosecond resolution. However, sharing the
- same event channel is a 100Hz tick which is delivered while the
- vcpu is running. We don't care about or use this tick, but it will
- cause the core time code to think the timer fired too soon, and
- will end up resetting it each time. It could be filtered, but
- doing so has complications when the ktime clocksource is not yet
- the xen clocksource (ie, at boot time).
-
- The new vcpu_op-based timer interface allows the tick timer period
- to be changed or turned off. The tick timer is not useful as a
- periodic timer because events are only delivered to running vcpus.
- The one-shot timer can report when a timeout is in the past, so
- set_next_event is capable of returning -ETIME when appropriate.
- This interface is used when available.
-*/
-
-
-/*
- Get a hypervisor absolute time. In theory we could maintain an
- offset between the kernel's time and the hypervisor's time, and
- apply that to a kernel's absolute timeout. Unfortunately the
- hypervisor and kernel times can drift even if the kernel is using
- the Xen clocksource, because ntp can warp the kernel's clocksource.
-*/
-static s64 get_abs_timeout(unsigned long delta)
-{
- return xen_clocksource_read() + delta;
-}
-
-static void xen_timerop_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- /* unsupported */
- WARN_ON(1);
- break;
-
- case CLOCK_EVT_MODE_ONESHOT:
- case CLOCK_EVT_MODE_RESUME:
- break;
-
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- HYPERVISOR_set_timer_op(0); /* cancel timeout */
- break;
- }
-}
-
-static int xen_timerop_set_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
-
- if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
- BUG();
-
- /* We may have missed the deadline, but there's no real way of
- knowing for sure. If the event was in the past, then we'll
- get an immediate interrupt. */
-
- return 0;
-}
-
-static const struct clock_event_device xen_timerop_clockevent = {
- .name = "xen",
- .features = CLOCK_EVT_FEAT_ONESHOT,
-
- .max_delta_ns = 0xffffffff,
- .min_delta_ns = TIMER_SLOP,
-
- .mult = 1,
- .shift = 0,
- .rating = 500,
-
- .set_mode = xen_timerop_set_mode,
- .set_next_event = xen_timerop_set_next_event,
-};
-
-
-
-static void xen_vcpuop_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- int cpu = smp_processor_id();
-
- switch (mode) {
- case CLOCK_EVT_MODE_PERIODIC:
- WARN_ON(1); /* unsupported */
- break;
-
- case CLOCK_EVT_MODE_ONESHOT:
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
- BUG();
- break;
-
- case CLOCK_EVT_MODE_UNUSED:
- case CLOCK_EVT_MODE_SHUTDOWN:
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
- HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
- BUG();
- break;
- case CLOCK_EVT_MODE_RESUME:
- break;
- }
-}
-
-static int xen_vcpuop_set_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- int cpu = smp_processor_id();
- struct vcpu_set_singleshot_timer single;
- int ret;
-
- WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
-
- single.timeout_abs_ns = get_abs_timeout(delta);
- single.flags = VCPU_SSHOTTMR_future;
-
- ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
-
- BUG_ON(ret != 0 && ret != -ETIME);
-
- return ret;
-}
-
-static const struct clock_event_device xen_vcpuop_clockevent = {
- .name = "xen",
- .features = CLOCK_EVT_FEAT_ONESHOT,
-
- .max_delta_ns = 0xffffffff,
- .min_delta_ns = TIMER_SLOP,
-
- .mult = 1,
- .shift = 0,
- .rating = 500,
-
- .set_mode = xen_vcpuop_set_mode,
- .set_next_event = xen_vcpuop_set_next_event,
-};
-
-static const struct clock_event_device *xen_clockevent =
- &xen_timerop_clockevent;
-static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
-
-static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
-{
- struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
- irqreturn_t ret;
-
- ret = IRQ_NONE;
- if (evt->event_handler) {
- evt->event_handler(evt);
- ret = IRQ_HANDLED;
- }
-
- do_stolen_accounting();
-
- return ret;
-}
-
-void xen_setup_timer(int cpu)
-{
- const char *name;
- struct clock_event_device *evt;
- int irq;
-
- printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
-
- name = kasprintf(GFP_KERNEL, "timer%d", cpu);
- if (!name)
- name = "<timer kasprintf failed>";
-
- irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
- name, NULL);
-
- evt = &per_cpu(xen_clock_events, cpu);
- memcpy(evt, xen_clockevent, sizeof(*evt));
-
- evt->cpumask = cpumask_of_cpu(cpu);
- evt->irq = irq;
-
- setup_runstate_info(cpu);
-}
-
-void xen_setup_cpu_clockevents(void)
-{
- BUG_ON(preemptible());
-
- clockevents_register_device(&__get_cpu_var(xen_clock_events));
-}
-
-__init void xen_time_init(void)
-{
- int cpu = smp_processor_id();
-
- get_time_values_from_xen();
-
- clocksource_register(&xen_clocksource);
-
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
- /* Successfully turned off 100Hz tick, so we have the
- vcpuop-based timer interface */
- printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
- xen_clockevent = &xen_vcpuop_clockevent;
- }
-
- /* Set initial system time with full resolution */
- xen_read_wallclock(&xtime);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
- tsc_disable = 0;
-
- xen_setup_timer(cpu);
- xen_setup_cpu_clockevents();
-}
diff --git a/arch/i386/xen/vdso.h b/arch/i386/xen/vdso.h
deleted file mode 100644
index 861fedfe523..00000000000
--- a/arch/i386/xen/vdso.h
+++ /dev/null
@@ -1,4 +0,0 @@
-/* Bit used for the pseudo-hwcap for non-negative segments. We use
- bit 1 to avoid bugs in some versions of glibc when bit 0 is
- used; the choice is otherwise arbitrary. */
-#define VDSO_NOTE_NONEGSEG_BIT 1
diff --git a/arch/i386/xen/xen-asm.S b/arch/i386/xen/xen-asm.S
deleted file mode 100644
index 1a43b60c0c6..00000000000
--- a/arch/i386/xen/xen-asm.S
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- Asm versions of Xen pv-ops, suitable for either direct use or inlining.
- The inline versions are the same as the direct-use versions, with the
- pre- and post-amble chopped off.
-
- This code is encoded for size rather than absolute efficiency,
- with a view to being able to inline as much as possible.
-
- We only bother with direct forms (ie, vcpu in pda) of the operations
- here; the indirect forms are better handled in C, since they're
- generally too large to inline anyway.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/percpu.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-
-#include <xen/interface/xen.h>
-
-#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
-#define ENDPATCH(x) .globl x##_end; x##_end=.
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI 0x80000000
-
-/*
- Enable events. This clears the event mask and tests the pending
- event status with one and operation. If there are pending
- events, then enter the hypervisor to get them handled.
- */
-ENTRY(xen_irq_enable_direct)
- /* Clear mask and test pending */
- andw $0x00ff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
- /* Preempt here doesn't matter because that will deal with
- any pending interrupts. The pending check may end up being
- run on the wrong CPU, but that doesn't hurt. */
- jz 1f
-2: call check_events
-1:
-ENDPATCH(xen_irq_enable_direct)
- ret
- ENDPROC(xen_irq_enable_direct)
- RELOC(xen_irq_enable_direct, 2b+1)
-
-
-/*
- Disabling events is simply a matter of making the event mask
- non-zero.
- */
-ENTRY(xen_irq_disable_direct)
- movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-ENDPATCH(xen_irq_disable_direct)
- ret
- ENDPROC(xen_irq_disable_direct)
- RELOC(xen_irq_disable_direct, 0)
-
-/*
- (xen_)save_fl is used to get the current interrupt enable status.
- Callers expect the status to be in X86_EFLAGS_IF, and other bits
- may be set in the return value. We take advantage of this by
- making sure that X86_EFLAGS_IF has the right value (and other bits
- in that byte are 0), but other bits in the return value are
- undefined. We need to toggle the state of the bit, because
- Xen and x86 use opposite senses (mask vs enable).
- */
-ENTRY(xen_save_fl_direct)
- testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
- setz %ah
- addb %ah,%ah
-ENDPATCH(xen_save_fl_direct)
- ret
- ENDPROC(xen_save_fl_direct)
- RELOC(xen_save_fl_direct, 0)
-
-
-/*
- In principle the caller should be passing us a value return
- from xen_save_fl_direct, but for robustness sake we test only
- the X86_EFLAGS_IF flag rather than the whole byte. After
- setting the interrupt mask state, it checks for unmasked
- pending events and enters the hypervisor to get them delivered
- if so.
- */
-ENTRY(xen_restore_fl_direct)
- testb $X86_EFLAGS_IF>>8, %ah
- setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
- /* Preempt here doesn't matter because that will deal with
- any pending interrupts. The pending check may end up being
- run on the wrong CPU, but that doesn't hurt. */
-
- /* check for unmasked and pending */
- cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
- jz 1f
-2: call check_events
-1:
-ENDPATCH(xen_restore_fl_direct)
- ret
- ENDPROC(xen_restore_fl_direct)
- RELOC(xen_restore_fl_direct, 2b+1)
-
-/*
- This is run where a normal iret would be run, with the same stack setup:
- 8: eflags
- 4: cs
- esp-> 0: eip
-
- This attempts to make sure that any pending events are dealt
- with on return to usermode, but there is a small window in
- which an event can happen just before entering usermode. If
- the nested interrupt ends up setting one of the TIF_WORK_MASK
- pending work flags, they will not be tested again before
- returning to usermode. This means that a process can end up
- with pending work, which will be unprocessed until the process
- enters and leaves the kernel again, which could be an
- unbounded amount of time. This means that a pending signal or
- reschedule event could be indefinitely delayed.
-
- The fix is to notice a nested interrupt in the critical
- window, and if one occurs, then fold the nested interrupt into
- the current interrupt stack frame, and re-process it
- iteratively rather than recursively. This means that it will
- exit via the normal path, and all pending work will be dealt
- with appropriately.
-
- Because the nested interrupt handler needs to deal with the
- current stack state in whatever form its in, we keep things
- simple by only using a single register which is pushed/popped
- on the stack.
-
- Non-direct iret could be done in the same way, but it would
- require an annoying amount of code duplication. We'll assume
- that direct mode will be the common case once the hypervisor
- support becomes commonplace.
- */
-ENTRY(xen_iret_direct)
- /* test eflags for special cases */
- testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
- jnz hyper_iret
-
- push %eax
- ESP_OFFSET=4 # bytes pushed onto stack
-
- /* Store vcpu_info pointer for easy access. Do it this
- way to avoid having to reload %fs */
-#ifdef CONFIG_SMP
- GET_THREAD_INFO(%eax)
- movl TI_cpu(%eax),%eax
- movl __per_cpu_offset(,%eax,4),%eax
- lea per_cpu__xen_vcpu_info(%eax),%eax
-#else
- movl $per_cpu__xen_vcpu_info, %eax
-#endif
-
- /* check IF state we're restoring */
- testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
-
- /* Maybe enable events. Once this happens we could get a
- recursive event, so the critical region starts immediately
- afterwards. However, if that happens we don't end up
- resuming the code, so we don't have to be worried about
- being preempted to another CPU. */
- setz XEN_vcpu_info_mask(%eax)
-xen_iret_start_crit:
-
- /* check for unmasked and pending */
- cmpw $0x0001, XEN_vcpu_info_pending(%eax)
-
- /* If there's something pending, mask events again so we
- can jump back into xen_hypervisor_callback */
- sete XEN_vcpu_info_mask(%eax)
-
- popl %eax
-
- /* From this point on the registers are restored and the stack
- updated, so we don't need to worry about it if we're preempted */
-iret_restore_end:
-
- /* Jump to hypervisor_callback after fixing up the stack.
- Events are masked, so jumping out of the critical
- region is OK. */
- je xen_hypervisor_callback
-
- iret
-xen_iret_end_crit:
-
-hyper_iret:
- /* put this out of line since its very rarely used */
- jmp hypercall_page + __HYPERVISOR_iret * 32
-
- .globl xen_iret_start_crit, xen_iret_end_crit
-
-/*
- This is called by xen_hypervisor_callback in entry.S when it sees
- that the EIP at the time of interrupt was between xen_iret_start_crit
- and xen_iret_end_crit. We're passed the EIP in %eax so we can do
- a more refined determination of what to do.
-
- The stack format at this point is:
- ----------------
- ss : (ss/esp may be present if we came from usermode)
- esp :
- eflags } outer exception info
- cs }
- eip }
- ---------------- <- edi (copy dest)
- eax : outer eax if it hasn't been restored
- ----------------
- eflags } nested exception info
- cs } (no ss/esp because we're nested
- eip } from the same ring)
- orig_eax }<- esi (copy src)
- - - - - - - - -
- fs }
- es }
- ds } SAVE_ALL state
- eax }
- : :
- ebx }
- ----------------
- return addr <- esp
- ----------------
-
- In order to deliver the nested exception properly, we need to shift
- everything from the return addr up to the error code so it
- sits just under the outer exception info. This means that when we
- handle the exception, we do it in the context of the outer exception
- rather than starting a new one.
-
- The only caveat is that if the outer eax hasn't been
- restored yet (ie, it's still on stack), we need to insert
- its value into the SAVE_ALL state before going on, since
- it's usermode state which we eventually need to restore.
- */
-ENTRY(xen_iret_crit_fixup)
- /* offsets +4 for return address */
-
- /*
- Paranoia: Make sure we're really coming from userspace.
- One could imagine a case where userspace jumps into the
- critical range address, but just before the CPU delivers a GP,
- it decides to deliver an interrupt instead. Unlikely?
- Definitely. Easy to avoid? Yes. The Intel documents
- explicitly say that the reported EIP for a bad jump is the
- jump instruction itself, not the destination, but some virtual
- environments get this wrong.
- */
- movl PT_CS+4(%esp), %ecx
- andl $SEGMENT_RPL_MASK, %ecx
- cmpl $USER_RPL, %ecx
- je 2f
-
- lea PT_ORIG_EAX+4(%esp), %esi
- lea PT_EFLAGS+4(%esp), %edi
-
- /* If eip is before iret_restore_end then stack
- hasn't been restored yet. */
- cmp $iret_restore_end, %eax
- jae 1f
-
- movl 0+4(%edi),%eax /* copy EAX */
- movl %eax, PT_EAX+4(%esp)
-
- lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
-
- /* set up the copy */
-1: std
- mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */
- rep movsl
- cld
-
- lea 4(%edi),%esp /* point esp to new frame */
-2: ret
-
-
-/*
- Force an event check by making a hypercall,
- but preserve regs before making the call.
- */
-check_events:
- push %eax
- push %ecx
- push %edx
- call force_evtchn_callback
- pop %edx
- pop %ecx
- pop %eax
- ret
diff --git a/arch/i386/xen/xen-head.S b/arch/i386/xen/xen-head.S
deleted file mode 100644
index f8d6937db2e..00000000000
--- a/arch/i386/xen/xen-head.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Xen-specific pieces of head.S, intended to be included in the right
- place in head.S */
-
-#ifdef CONFIG_XEN
-
-#include <linux/elfnote.h>
-#include <asm/boot.h>
-#include <xen/interface/elfnote.h>
-
-.pushsection .init.text
-ENTRY(startup_xen)
- movl %esi,xen_start_info
- cld
- movl $(init_thread_union+THREAD_SIZE),%esp
- jmp xen_start_kernel
-.popsection
-
-.pushsection .bss.page_aligned
- .align PAGE_SIZE_asm
-ENTRY(hypercall_page)
- .skip 0x1000
-.popsection
-
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
- ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
- ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET)
- ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen)
- ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
-#ifdef CONFIG_X86_PAE
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
-#else
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
-#endif
- ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
-
-#endif /*CONFIG_XEN */
diff --git a/arch/i386/xen/xen-ops.h b/arch/i386/xen/xen-ops.h
deleted file mode 100644
index b9aaea45f07..00000000000
--- a/arch/i386/xen/xen-ops.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef XEN_OPS_H
-#define XEN_OPS_H
-
-#include <linux/init.h>
-
-/* These are code, but not functions. Defined in entry.S */
-extern const char xen_hypervisor_callback[];
-extern const char xen_failsafe_callback[];
-
-void xen_copy_trap_info(struct trap_info *traps);
-
-DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
-DECLARE_PER_CPU(unsigned long, xen_cr3);
-
-extern struct start_info *xen_start_info;
-extern struct shared_info *HYPERVISOR_shared_info;
-
-char * __init xen_memory_setup(void);
-void __init xen_arch_setup(void);
-void __init xen_init_IRQ(void);
-
-void xen_setup_timer(int cpu);
-void xen_setup_cpu_clockevents(void);
-unsigned long xen_cpu_khz(void);
-void __init xen_time_init(void);
-unsigned long xen_get_wallclock(void);
-int xen_set_wallclock(unsigned long time);
-unsigned long long xen_sched_clock(void);
-
-void xen_mark_init_mm_pinned(void);
-
-DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
-
-static inline unsigned xen_get_lazy_mode(void)
-{
- return x86_read_percpu(xen_lazy_mode);
-}
-
-void __init xen_fill_possible_map(void);
-
-void __init xen_setup_vcpu_info_placement(void);
-void xen_smp_prepare_boot_cpu(void);
-void xen_smp_prepare_cpus(unsigned int max_cpus);
-int xen_cpu_up(unsigned int cpu);
-void xen_smp_cpus_done(unsigned int max_cpus);
-
-void xen_smp_send_stop(void);
-void xen_smp_send_reschedule(int cpu);
-int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait);
-int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait);
-
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
- void *info, int wait);
-
-
-/* Declare an asm function, along with symbols needed to make it
- inlineable */
-#define DECL_ASM(ret, name, ...) \
- ret name(__VA_ARGS__); \
- extern char name##_end[]; \
- extern char name##_reloc[] \
-
-DECL_ASM(void, xen_irq_enable_direct, void);
-DECL_ASM(void, xen_irq_disable_direct, void);
-DECL_ASM(unsigned long, xen_save_fl_direct, void);
-DECL_ASM(void, xen_restore_fl_direct, unsigned long);
-
-void xen_iret_direct(void);
-#endif /* XEN_OPS_H */