diff options
-rw-r--r-- | arch/x86/include/asm/xen/interface.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/interface_32.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/interface_64.h | 13 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/page.h | 7 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 19 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 14 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 11 | ||||
-rw-r--r-- | drivers/xen/Makefile | 5 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 32 | ||||
-rw-r--r-- | drivers/xen/events.c | 13 | ||||
-rw-r--r-- | drivers/xen/evtchn.c | 100 | ||||
-rw-r--r-- | include/xen/interface/memory.h | 13 | ||||
-rw-r--r-- | include/xen/page.h | 7 |
13 files changed, 171 insertions, 74 deletions
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index e8506c1f0c5..1c10c88ee4e 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #endif -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 32 diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index 42a7e004ae5..8413688b257 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -32,6 +32,11 @@ /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 + +#define __MACH2PHYS_SHIFT 2 + /* * Virtual addresses beyond this are not modifiable by guest OSes. The * machine->physical mapping table starts at this address, read-only. diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index 100d2662b97..839a4811cf9 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -39,18 +39,7 @@ #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define __MACH2PHYS_SHIFT 3 /* * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index dd8c1414b3d..8760cc60a21 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/pfn.h> +#include <linux/mm.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -35,6 +36,8 @@ typedef struct xpaddr { #define MAX_DOMAIN_PAGES \ ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) +extern unsigned long *machine_to_phys_mapping; +extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); @@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; -#if 0 if (unlikely((mfn >> machine_to_phys_order) != 0)) - return max_mapnr; -#endif + return ~0; pfn = 0; /* diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 235c0f4d386..7250bef7f49 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); +unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; +EXPORT_SYMBOL(machine_to_phys_mapping); +unsigned int machine_to_phys_order; +EXPORT_SYMBOL(machine_to_phys_order); + struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -1090,6 +1095,8 @@ static void __init xen_setup_stackprotector(void) /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { + struct physdev_set_iopl set_iopl; + int rc; pgd_t *pgd; if (!xen_start_info) @@ -1097,6 +1104,8 @@ asmlinkage void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; + xen_setup_machphys_mapping(); + /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; @@ -1202,10 +1211,18 @@ asmlinkage void __init xen_start_kernel(void) #else pv_info.kernel_rpl = 0; #endif - /* set the limit of our address space */ xen_reserve_top(); + /* We used to do this in xen_arch_setup, but that is too late on AMD + * were early_cpu_init (run before ->arch_setup()) calls early_amd_init + * which pokes 0xcf8 port. + */ + set_iopl.iopl = 1; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); + if (rc != 0) + xen_raw_printk("physdev_op failed %d\n", rc); + #ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 276c67bba5a..790af908284 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } +void __init xen_setup_machphys_mapping(void) +{ + struct xen_machphys_mapping mapping; + unsigned long machine_to_phys_nr_ents; + + if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { + machine_to_phys_mapping = (unsigned long *)mapping.v_start; + machine_to_phys_nr_ents = mapping.max_mfn + 1; + } else { + machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + } + machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); +} + #ifdef CONFIG_X86_64 static void convert_pfn_mfn(void *v) { diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 769c4b01fa3..38fdffaa71d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -248,8 +248,7 @@ char * __init xen_memory_setup(void) else extra_pages = 0; - if (!xen_initial_domain()) - xen_add_extra_mem(extra_pages); + xen_add_extra_mem(extra_pages); return "Xen"; } @@ -337,9 +336,6 @@ void __cpuinit xen_enable_syscall(void) void __init xen_arch_setup(void) { - struct physdev_set_iopl set_iopl; - int rc; - xen_panic_handler_init(); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); @@ -356,11 +352,6 @@ void __init xen_arch_setup(void) xen_enable_sysenter(); xen_enable_syscall(); - set_iopl.iopl = 1; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - if (rc != 0) - printk(KERN_INFO "physdev_op failed %d\n", rc); - #ifdef CONFIG_ACPI if (!(xen_start_info->flags & SIF_INITDOMAIN)) { printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index eb8a78d77d9..533a199e7a3 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -8,9 +8,12 @@ obj-$(CONFIG_BLOCK) += biomerge.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o -obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o +obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o + +xen-evtchn-y := evtchn.o + diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 500290b150b..2b17ad5b4b3 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -50,6 +50,7 @@ #include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/tlb.h> +#include <asm/e820.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -119,7 +120,7 @@ static void scrub_page(struct page *page) } /* balloon_append: add the given page to the balloon. */ -static void balloon_append(struct page *page) +static void __balloon_append(struct page *page) { /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { @@ -130,7 +131,11 @@ static void balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } +} +static void balloon_append(struct page *page) +{ + __balloon_append(page); totalram_pages--; } @@ -191,7 +196,7 @@ static unsigned long current_target(void) static int increase_reservation(unsigned long nr_pages) { - unsigned long pfn, i, flags; + unsigned long pfn, i; struct page *page; long rc; struct xen_memory_reservation reservation = { @@ -203,8 +208,6 @@ static int increase_reservation(unsigned long nr_pages) if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); - spin_lock_irqsave(&xen_reservation_lock, flags); - page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); @@ -247,14 +250,12 @@ static int increase_reservation(unsigned long nr_pages) balloon_stats.current_pages += rc; out: - spin_unlock_irqrestore(&xen_reservation_lock, flags); - return rc < 0 ? rc : rc != nr_pages; } static int decrease_reservation(unsigned long nr_pages) { - unsigned long pfn, i, flags; + unsigned long pfn, i; struct page *page; int need_sleep = 0; int ret; @@ -292,8 +293,6 @@ static int decrease_reservation(unsigned long nr_pages) kmap_flush_unused(); flush_tlb_all(); - spin_lock_irqsave(&xen_reservation_lock, flags); - /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); @@ -308,8 +307,6 @@ static int decrease_reservation(unsigned long nr_pages) balloon_stats.current_pages -= nr_pages; - spin_unlock_irqrestore(&xen_reservation_lock, flags); - return need_sleep; } @@ -395,7 +392,7 @@ static struct notifier_block xenstore_notifier; static int __init balloon_init(void) { - unsigned long pfn; + unsigned long pfn, extra_pfn_end; struct page *page; if (!xen_pv_domain()) @@ -416,10 +413,15 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); /* Initialise the balloon with excess memory space. */ - for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { + extra_pfn_end = min(e820_end_of_ram_pfn(), + (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); + for (pfn = PFN_UP(xen_extra_mem_start); + pfn < extra_pfn_end; + pfn++) { page = pfn_to_page(pfn); - if (!PageReserved(page)) - balloon_append(page); + /* totalram_pages doesn't include the boot-time + balloon extension, so don't subtract from it. */ + __balloon_append(page); } target_watch.callback = watch_target; diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 321a0c8346e..2811bb988ea 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -278,17 +278,17 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); #endif - __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); - __set_bit(chn, cpu_evtchn_mask(cpu)); + clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); + set_bit(chn, cpu_evtchn_mask(cpu)); irq_info[irq].cpu = cpu; } static void init_evtchn_cpu_bindings(void) { + int i; #ifdef CONFIG_SMP struct irq_desc *desc; - int i; /* By default all event channels notify CPU#0. */ for_each_irq_desc(i, desc) { @@ -296,7 +296,10 @@ static void init_evtchn_cpu_bindings(void) } #endif - memset(cpu_evtchn_mask(0), ~0, sizeof(struct cpu_evtchn_s)); + for_each_possible_cpu(i) + memset(cpu_evtchn_mask(i), + (i == 0) ? ~0 : 0, sizeof(struct cpu_evtchn_s)); + } static inline void clear_evtchn(int port) @@ -752,7 +755,7 @@ int xen_destroy_irq(int irq) goto out; if (xen_initial_domain()) { - unmap_irq.pirq = info->u.pirq.gsi; + unmap_irq.pirq = info->u.pirq.pirq; unmap_irq.domid = DOMID_SELF; rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); if (rc) { diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index fec6ba3c08a..ef11daf0caf 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -69,20 +69,51 @@ struct per_user_data { const char *name; }; -/* Who's bound to each port? */ -static struct per_user_data *port_user[NR_EVENT_CHANNELS]; +/* + * Who's bound to each port? This is logically an array of struct + * per_user_data *, but we encode the current enabled-state in bit 0. + */ +static unsigned long *port_user; static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ -irqreturn_t evtchn_interrupt(int irq, void *data) +static inline struct per_user_data *get_port_user(unsigned port) +{ + return (struct per_user_data *)(port_user[port] & ~1); +} + +static inline void set_port_user(unsigned port, struct per_user_data *u) +{ + port_user[port] = (unsigned long)u; +} + +static inline bool get_port_enabled(unsigned port) +{ + return port_user[port] & 1; +} + +static inline void set_port_enabled(unsigned port, bool enabled) +{ + if (enabled) + port_user[port] |= 1; + else + port_user[port] &= ~1; +} + +static irqreturn_t evtchn_interrupt(int irq, void *data) { unsigned int port = (unsigned long)data; struct per_user_data *u; spin_lock(&port_user_lock); - u = port_user[port]; + u = get_port_user(port); + + WARN(!get_port_enabled(port), + "Interrupt for port %d, but apparently not enabled; per-user %p\n", + port, u); disable_irq_nosync(irq); + set_port_enabled(port, false); if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; @@ -92,9 +123,8 @@ irqreturn_t evtchn_interrupt(int irq, void *data) kill_fasync(&u->evtchn_async_queue, SIGIO, POLL_IN); } - } else { + } else u->ring_overflow = 1; - } spin_unlock(&port_user_lock); @@ -198,9 +228,18 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, goto out; spin_lock_irq(&port_user_lock); - for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) - if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) - enable_irq(irq_from_evtchn(kbuf[i])); + + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { + unsigned port = kbuf[i]; + + if (port < NR_EVENT_CHANNELS && + get_port_user(port) == u && + !get_port_enabled(port)) { + set_port_enabled(port, true); + enable_irq(irq_from_evtchn(port)); + } + } + spin_unlock_irq(&port_user_lock); rc = count; @@ -222,8 +261,9 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) * interrupt handler yet, and our caller has already * serialized bind operations.) */ - BUG_ON(port_user[port] != NULL); - port_user[port] = u; + BUG_ON(get_port_user(port) != NULL); + set_port_user(port, u); + set_port_enabled(port, true); /* start enabled */ rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, u->name, (void *)(unsigned long)port); @@ -239,10 +279,7 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port) unbind_from_irqhandler(irq, (void *)(unsigned long)port); - /* make sure we unbind the irq handler before clearing the port */ - barrier(); - - port_user[port] = NULL; + set_port_user(port, NULL); } static long evtchn_ioctl(struct file *file, @@ -333,15 +370,17 @@ static long evtchn_ioctl(struct file *file, spin_lock_irq(&port_user_lock); rc = -ENOTCONN; - if (port_user[unbind.port] != u) { + if (get_port_user(unbind.port) != u) { spin_unlock_irq(&port_user_lock); break; } - evtchn_unbind_from_user(u, unbind.port); + disable_irq(irq_from_evtchn(unbind.port)); spin_unlock_irq(&port_user_lock); + evtchn_unbind_from_user(u, unbind.port); + rc = 0; break; } @@ -355,7 +394,7 @@ static long evtchn_ioctl(struct file *file, if (notify.port >= NR_EVENT_CHANNELS) { rc = -EINVAL; - } else if (port_user[notify.port] != u) { + } else if (get_port_user(notify.port) != u) { rc = -ENOTCONN; } else { notify_remote_via_evtchn(notify.port); @@ -431,7 +470,7 @@ static int evtchn_open(struct inode *inode, struct file *filp) filp->private_data = u; - return 0; + return nonseekable_open(inode, filp);; } static int evtchn_release(struct inode *inode, struct file *filp) @@ -444,14 +483,21 @@ static int evtchn_release(struct inode *inode, struct file *filp) free_page((unsigned long)u->ring); for (i = 0; i < NR_EVENT_CHANNELS; i++) { - if (port_user[i] != u) + if (get_port_user(i) != u) continue; - evtchn_unbind_from_user(port_user[i], i); + disable_irq(irq_from_evtchn(i)); } spin_unlock_irq(&port_user_lock); + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + if (get_port_user(i) != u) + continue; + + evtchn_unbind_from_user(get_port_user(i), i); + } + kfree(u->name); kfree(u); @@ -467,12 +513,12 @@ static const struct file_operations evtchn_fops = { .fasync = evtchn_fasync, .open = evtchn_open, .release = evtchn_release, - .llseek = noop_llseek, + .llseek = no_llseek, }; static struct miscdevice evtchn_miscdev = { .minor = MISC_DYNAMIC_MINOR, - .name = "evtchn", + .name = "xen/evtchn", .fops = &evtchn_fops, }; static int __init evtchn_init(void) @@ -482,8 +528,11 @@ static int __init evtchn_init(void) if (!xen_domain()) return -ENODEV; + port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); + if (port_user == NULL) + return -ENOMEM; + spin_lock_init(&port_user_lock); - memset(port_user, 0, sizeof(port_user)); /* Create '/dev/misc/evtchn'. */ err = misc_register(&evtchn_miscdev); @@ -499,6 +548,9 @@ static int __init evtchn_init(void) static void __exit evtchn_cleanup(void) { + kfree(port_user); + port_user = NULL; + misc_deregister(&evtchn_miscdev); } diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index d7a6c13bde6..eac3ce15371 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -141,6 +141,19 @@ struct xen_machphys_mfn_list { DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); /* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + unsigned long v_start, v_end; /* Start and end virtual addresses. */ + unsigned long max_mfn; /* Maximum MFN that can be looked up. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); + +/* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. * arg == addr of xen_add_to_physmap_t. diff --git a/include/xen/page.h b/include/xen/page.h index eaf85fab126..0be36b976f4 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -1 +1,8 @@ +#ifndef _XEN_PAGE_H +#define _XEN_PAGE_H + #include <asm/xen/page.h> + +extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size; + +#endif /* _XEN_PAGE_H */ |