diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Kconfig | 2 | ||||
-rw-r--r-- | drivers/xen/Makefile | 5 | ||||
-rw-r--r-- | drivers/xen/acpi.c | 41 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 74 | ||||
-rw-r--r-- | drivers/xen/events.c | 43 | ||||
-rw-r--r-- | drivers/xen/evtchn.c | 210 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 11 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 13 | ||||
-rw-r--r-- | drivers/xen/privcmd.c | 83 | ||||
-rw-r--r-- | drivers/xen/swiotlb-xen.c | 8 | ||||
-rw-r--r-- | drivers/xen/xen-selfballoon.c | 54 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 19 |
12 files changed, 382 insertions, 181 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 9e02d60a364..23eae5cb69c 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -145,7 +145,7 @@ config SWIOTLB_XEN config XEN_TMEM tristate - depends on !ARM + depends on !ARM && !ARM64 default m if (CLEANCACHE || FRONTSWAP) help Shim to interface in-kernel Transcendent Memory hooks diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index eabd0ee1c2b..14fe79d8634 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,9 +1,8 @@ -ifneq ($(CONFIG_ARM),y) -obj-y += manage.o +ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),) obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o endif obj-$(CONFIG_X86) += fallback.o -obj-y += grant-table.o features.o events.o balloon.o +obj-y += grant-table.o features.o events.o balloon.o manage.o obj-y += xenbus/ nostackp := $(call cc-option, -fno-stack-protector) diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c index 119d42a2bf5..90307c0b630 100644 --- a/drivers/xen/acpi.c +++ b/drivers/xen/acpi.c @@ -35,28 +35,43 @@ #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> -int xen_acpi_notify_hypervisor_state(u8 sleep_state, - u32 pm1a_cnt, u32 pm1b_cnt) +static int xen_acpi_notify_hypervisor_state(u8 sleep_state, + u32 val_a, u32 val_b, + bool extended) { + unsigned int bits = extended ? 8 : 16; + struct xen_platform_op op = { .cmd = XENPF_enter_acpi_sleep, .interface_version = XENPF_INTERFACE_VERSION, - .u = { - .enter_acpi_sleep = { - .pm1a_cnt_val = (u16)pm1a_cnt, - .pm1b_cnt_val = (u16)pm1b_cnt, - .sleep_state = sleep_state, - }, + .u.enter_acpi_sleep = { + .val_a = (u16)val_a, + .val_b = (u16)val_b, + .sleep_state = sleep_state, + .flags = extended ? XENPF_ACPI_SLEEP_EXTENDED : 0, }, }; - if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) { - WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!" - "Email xen-devel@lists.xensource.com Thank you.\n", \ - pm1a_cnt, pm1b_cnt); + if (WARN((val_a & (~0 << bits)) || (val_b & (~0 << bits)), + "Using more than %u bits of sleep control values %#x/%#x!" + "Email xen-devel@lists.xen.org - Thank you.\n", \ + bits, val_a, val_b)) return -1; - } HYPERVISOR_dom0_op(&op); return 1; } + +int xen_acpi_notify_hypervisor_sleep(u8 sleep_state, + u32 pm1a_cnt, u32 pm1b_cnt) +{ + return xen_acpi_notify_hypervisor_state(sleep_state, pm1a_cnt, + pm1b_cnt, false); +} + +int xen_acpi_notify_hypervisor_extended_sleep(u8 sleep_state, + u32 val_a, u32 val_b) +{ + return xen_acpi_notify_hypervisor_state(sleep_state, val_a, + val_b, true); +} diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 2a2ef97697b..3101cf6daf5 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -38,6 +38,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt +#include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/errno.h> @@ -52,6 +53,7 @@ #include <linux/notifier.h> #include <linux/memory.h> #include <linux/memory_hotplug.h> +#include <linux/percpu-defs.h> #include <asm/page.h> #include <asm/pgalloc.h> @@ -90,6 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; +static DEFINE_PER_CPU(struct page *, balloon_scratch_page); + /* List of ballooned pages, threaded through the mem_map array. */ static LIST_HEAD(ballooned_pages); @@ -412,7 +416,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - __pte_ma(0), 0); + pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)), + PAGE_KERNEL_RO), 0); BUG_ON(ret); } #endif @@ -425,7 +430,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long p; + struct page *pg; + pg = __get_cpu_var(balloon_scratch_page); + p = page_to_pfn(pg); + __set_phys_to_machine(pfn, pfn_to_mfn(p)); + } balloon_append(pfn_to_page(pfn)); } @@ -480,6 +491,18 @@ static void balloon_process(struct work_struct *work) mutex_unlock(&balloon_mutex); } +struct page *get_balloon_scratch_page(void) +{ + struct page *ret = get_cpu_var(balloon_scratch_page); + BUG_ON(ret == NULL); + return ret; +} + +void put_balloon_scratch_page(void) +{ + put_cpu_var(balloon_scratch_page); +} + /* Resets the Xen limit, sets new target, and kicks off processing. */ void balloon_set_new_target(unsigned long target) { @@ -573,13 +596,47 @@ static void __init balloon_add_region(unsigned long start_pfn, } } +static int __cpuinit balloon_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + switch (action) { + case CPU_UP_PREPARE: + if (per_cpu(balloon_scratch_page, cpu) != NULL) + break; + per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); + if (per_cpu(balloon_scratch_page, cpu) == NULL) { + pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); + return NOTIFY_BAD; + } + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block balloon_cpu_notifier __cpuinitdata = { + .notifier_call = balloon_cpu_notify, +}; + static int __init balloon_init(void) { - int i; + int i, cpu; if (!xen_domain()) return -ENODEV; + for_each_online_cpu(cpu) + { + per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); + if (per_cpu(balloon_scratch_page, cpu) == NULL) { + pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); + return -ENOMEM; + } + } + register_cpu_notifier(&balloon_cpu_notifier); + pr_info("Initialising balloon driver\n"); balloon_stats.current_pages = xen_pv_domain() @@ -616,4 +673,15 @@ static int __init balloon_init(void) subsys_initcall(balloon_init); +static int __init balloon_clear(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(balloon_scratch_page, cpu) = NULL; + + return 0; +} +early_initcall(balloon_clear); + MODULE_LICENSE("GPL"); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index a58ac435a9a..4035e833ea2 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -56,6 +56,7 @@ #include <xen/interface/hvm/params.h> #include <xen/interface/physdev.h> #include <xen/interface/sched.h> +#include <xen/interface/vcpu.h> #include <asm/hw_irq.h> /* @@ -348,7 +349,7 @@ static void init_evtchn_cpu_bindings(void) for_each_possible_cpu(i) memset(per_cpu(cpu_evtchn_mask, i), - (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i))); + (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8); } static inline void clear_evtchn(int port) @@ -1212,7 +1213,17 @@ EXPORT_SYMBOL_GPL(evtchn_put); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) { - int irq = per_cpu(ipi_to_irq, cpu)[vector]; + int irq; + +#ifdef CONFIG_X86 + if (unlikely(vector == XEN_NMI_VECTOR)) { + int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL); + if (rc < 0) + printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc); + return; + } +#endif + irq = per_cpu(ipi_to_irq, cpu)[vector]; BUG_ON(irq < 0); notify_remote_via_irq(irq); } @@ -1379,14 +1390,21 @@ static void __xen_evtchn_do_upcall(void) pending_bits = active_evtchns(cpu, s, word_idx); bit_idx = 0; /* usually scan entire word from start */ + /* + * We scan the starting word in two parts. + * + * 1st time: start in the middle, scanning the + * upper bits. + * + * 2nd time: scan the whole word (not just the + * parts skipped in the first pass) -- if an + * event in the previously scanned bits is + * pending again it would just be scanned on + * the next loop anyway. + */ if (word_idx == start_word_idx) { - /* We scan the starting word in two parts */ if (i == 0) - /* 1st time: start in the middle */ bit_idx = start_bit_idx; - else - /* 2nd time: mask bits done already */ - bit_idx &= (1UL << start_bit_idx) - 1; } do { @@ -1493,8 +1511,10 @@ void rebind_evtchn_irq(int evtchn, int irq) /* Rebind an evtchn so that it gets delivered to a specific cpu */ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { + struct shared_info *s = HYPERVISOR_shared_info; struct evtchn_bind_vcpu bind_vcpu; int evtchn = evtchn_from_irq(irq); + int masked; if (!VALID_EVTCHN(evtchn)) return -1; @@ -1511,6 +1531,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) bind_vcpu.vcpu = tcpu; /* + * Mask the event while changing the VCPU binding to prevent + * it being delivered on an unexpected VCPU. + */ + masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask)); + + /* * If this fails, it usually just indicates that we're dealing with a * virq or IPI channel, which don't actually need to be rebound. Ignore * it, but don't do the xenlinux-level rebind in that case. @@ -1518,6 +1544,9 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu); + if (!masked) + unmask_evtchn(evtchn); + return 0; } diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 8feecf01d55..8b3a69a06c3 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -57,6 +57,7 @@ struct per_user_data { struct mutex bind_mutex; /* serialize bind/unbind operations */ + struct rb_root evtchns; /* Notification ring, accessed via /dev/xen/evtchn. */ #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) @@ -64,6 +65,7 @@ struct per_user_data { evtchn_port_t *ring; unsigned int ring_cons, ring_prod, ring_overflow; struct mutex ring_cons_mutex; /* protect against concurrent readers */ + spinlock_t ring_prod_lock; /* product against concurrent interrupts */ /* Processes wait on this queue when ring is empty. */ wait_queue_head_t evtchn_wait; @@ -71,54 +73,79 @@ struct per_user_data { const char *name; }; -/* - * Who's bound to each port? This is logically an array of struct - * per_user_data *, but we encode the current enabled-state in bit 0. - */ -static unsigned long *port_user; -static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ +struct user_evtchn { + struct rb_node node; + struct per_user_data *user; + unsigned port; + bool enabled; +}; -static inline struct per_user_data *get_port_user(unsigned port) +static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { - return (struct per_user_data *)(port_user[port] & ~1); -} + struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; -static inline void set_port_user(unsigned port, struct per_user_data *u) -{ - port_user[port] = (unsigned long)u; + while (*new) { + struct user_evtchn *this; + + this = container_of(*new, struct user_evtchn, node); + + parent = *new; + if (this->port < evtchn->port) + new = &((*new)->rb_left); + else if (this->port > evtchn->port) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&evtchn->node, parent, new); + rb_insert_color(&evtchn->node, &u->evtchns); + + return 0; } -static inline bool get_port_enabled(unsigned port) +static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { - return port_user[port] & 1; + rb_erase(&evtchn->node, &u->evtchns); + kfree(evtchn); } -static inline void set_port_enabled(unsigned port, bool enabled) +static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port) { - if (enabled) - port_user[port] |= 1; - else - port_user[port] &= ~1; + struct rb_node *node = u->evtchns.rb_node; + + while (node) { + struct user_evtchn *evtchn; + + evtchn = container_of(node, struct user_evtchn, node); + + if (evtchn->port < port) + node = node->rb_left; + else if (evtchn->port > port) + node = node->rb_right; + else + return evtchn; + } + return NULL; } static irqreturn_t evtchn_interrupt(int irq, void *data) { - unsigned int port = (unsigned long)data; - struct per_user_data *u; + struct user_evtchn *evtchn = data; + struct per_user_data *u = evtchn->user; - spin_lock(&port_user_lock); - - u = get_port_user(port); - - WARN(!get_port_enabled(port), + WARN(!evtchn->enabled, "Interrupt for port %d, but apparently not enabled; per-user %p\n", - port, u); + evtchn->port, u); disable_irq_nosync(irq); - set_port_enabled(port, false); + evtchn->enabled = false; + + spin_lock(&u->ring_prod_lock); if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { - u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; wmb(); /* Ensure ring contents visible */ if (u->ring_cons == u->ring_prod++) { wake_up_interruptible(&u->evtchn_wait); @@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) } else u->ring_overflow = 1; - spin_unlock(&port_user_lock); + spin_unlock(&u->ring_prod_lock); return IRQ_HANDLED; } @@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, if (copy_from_user(kbuf, buf, count) != 0) goto out; - spin_lock_irq(&port_user_lock); + mutex_lock(&u->bind_mutex); for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { unsigned port = kbuf[i]; + struct user_evtchn *evtchn; - if (port < NR_EVENT_CHANNELS && - get_port_user(port) == u && - !get_port_enabled(port)) { - set_port_enabled(port, true); + evtchn = find_evtchn(u, port); + if (evtchn && !evtchn->enabled) { + evtchn->enabled = true; enable_irq(irq_from_evtchn(port)); } } - spin_unlock_irq(&port_user_lock); + mutex_unlock(&u->bind_mutex); rc = count; @@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, static int evtchn_bind_to_user(struct per_user_data *u, int port) { + struct user_evtchn *evtchn; + struct evtchn_close close; int rc = 0; /* @@ -263,35 +292,46 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) * interrupt handler yet, and our caller has already * serialized bind operations.) */ - BUG_ON(get_port_user(port) != NULL); - set_port_user(port, u); - set_port_enabled(port, true); /* start enabled */ + + evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); + if (!evtchn) + return -ENOMEM; + + evtchn->user = u; + evtchn->port = port; + evtchn->enabled = true; /* start enabled */ + + rc = add_evtchn(u, evtchn); + if (rc < 0) + goto err; rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, - u->name, (void *)(unsigned long)port); - if (rc >= 0) - rc = evtchn_make_refcounted(port); - else { - /* bind failed, should close the port now */ - struct evtchn_close close; - close.port = port; - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) - BUG(); - set_port_user(port, NULL); - } + u->name, evtchn); + if (rc < 0) + goto err; + + rc = evtchn_make_refcounted(port); + return rc; +err: + /* bind failed, should close the port now */ + close.port = port; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); + del_evtchn(u, evtchn); return rc; } -static void evtchn_unbind_from_user(struct per_user_data *u, int port) +static void evtchn_unbind_from_user(struct per_user_data *u, + struct user_evtchn *evtchn) { - int irq = irq_from_evtchn(port); + int irq = irq_from_evtchn(evtchn->port); BUG_ON(irq < 0); - unbind_from_irqhandler(irq, (void *)(unsigned long)port); + unbind_from_irqhandler(irq, evtchn); - set_port_user(port, NULL); + del_evtchn(u, evtchn); } static long evtchn_ioctl(struct file *file, @@ -370,6 +410,7 @@ static long evtchn_ioctl(struct file *file, case IOCTL_EVTCHN_UNBIND: { struct ioctl_evtchn_unbind unbind; + struct user_evtchn *evtchn; rc = -EFAULT; if (copy_from_user(&unbind, uarg, sizeof(unbind))) @@ -379,36 +420,28 @@ static long evtchn_ioctl(struct file *file, if (unbind.port >= NR_EVENT_CHANNELS) break; - spin_lock_irq(&port_user_lock); - rc = -ENOTCONN; - if (get_port_user(unbind.port) != u) { - spin_unlock_irq(&port_user_lock); + evtchn = find_evtchn(u, unbind.port); + if (!evtchn) break; - } disable_irq(irq_from_evtchn(unbind.port)); - - spin_unlock_irq(&port_user_lock); - - evtchn_unbind_from_user(u, unbind.port); - + evtchn_unbind_from_user(u, evtchn); rc = 0; break; } case IOCTL_EVTCHN_NOTIFY: { struct ioctl_evtchn_notify notify; + struct user_evtchn *evtchn; rc = -EFAULT; if (copy_from_user(¬ify, uarg, sizeof(notify))) break; - if (notify.port >= NR_EVENT_CHANNELS) { - rc = -EINVAL; - } else if (get_port_user(notify.port) != u) { - rc = -ENOTCONN; - } else { + rc = -ENOTCONN; + evtchn = find_evtchn(u, notify.port); + if (evtchn) { notify_remote_via_evtchn(notify.port); rc = 0; } @@ -418,9 +451,9 @@ static long evtchn_ioctl(struct file *file, case IOCTL_EVTCHN_RESET: { /* Initialise the ring to empty. Clear errors. */ mutex_lock(&u->ring_cons_mutex); - spin_lock_irq(&port_user_lock); + spin_lock_irq(&u->ring_prod_lock); u->ring_cons = u->ring_prod = u->ring_overflow = 0; - spin_unlock_irq(&port_user_lock); + spin_unlock_irq(&u->ring_prod_lock); mutex_unlock(&u->ring_cons_mutex); rc = 0; break; @@ -479,6 +512,7 @@ static int evtchn_open(struct inode *inode, struct file *filp) mutex_init(&u->bind_mutex); mutex_init(&u->ring_cons_mutex); + spin_lock_init(&u->ring_prod_lock); filp->private_data = u; @@ -487,29 +521,18 @@ static int evtchn_open(struct inode *inode, struct file *filp) static int evtchn_release(struct inode *inode, struct file *filp) { - int i; struct per_user_data *u = filp->private_data; + struct rb_node *node; - spin_lock_irq(&port_user_lock); - - free_page((unsigned long)u->ring); - - for (i = 0; i < NR_EVENT_CHANNELS; i++) { - if (get_port_user(i) != u) - continue; - - disable_irq(irq_from_evtchn(i)); - } - - spin_unlock_irq(&port_user_lock); - - for (i = 0; i < NR_EVENT_CHANNELS; i++) { - if (get_port_user(i) != u) - continue; + while ((node = u->evtchns.rb_node)) { + struct user_evtchn *evtchn; - evtchn_unbind_from_user(get_port_user(i), i); + evtchn = rb_entry(node, struct user_evtchn, node); + disable_irq(irq_from_evtchn(evtchn->port)); + evtchn_unbind_from_user(u, evtchn); } + free_page((unsigned long)u->ring); kfree(u->name); kfree(u); @@ -540,12 +563,6 @@ static int __init evtchn_init(void) if (!xen_domain()) return -ENODEV; - port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); - if (port_user == NULL) - return -ENOMEM; - - spin_lock_init(&port_user_lock); - /* Create '/dev/xen/evtchn'. */ err = misc_register(&evtchn_miscdev); if (err != 0) { @@ -560,9 +577,6 @@ static int __init evtchn_init(void) static void __exit evtchn_cleanup(void) { - kfree(port_user); - port_user = NULL; - misc_deregister(&evtchn_miscdev); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index eab5427c75f..e41c79c986e 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -272,19 +272,12 @@ static int map_grant_pages(struct grant_map *map) * with find_grant_ptes. */ for (i = 0; i < map->count; i++) { - unsigned level; unsigned long address = (unsigned long) pfn_to_kaddr(page_to_pfn(map->pages[i])); - pte_t *ptep; - u64 pte_maddr = 0; BUG_ON(PageHighMem(map->pages[i])); - ptep = lookup_address(address, &level); - pte_maddr = arbitrary_virt_to_machine(ptep).maddr; - gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, - map->flags | - GNTMAP_host_map | - GNTMAP_contains_pte, + gnttab_set_map_op(&map->kmap_ops[i], address, + map->flags | GNTMAP_host_map, map->grants[i].ref, map->grants[i].domid); } diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 04cdeb8e371..c4d2298893b 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -730,9 +730,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback, void (*fn)(void *), void *arg, u16 count) { unsigned long flags; + struct gnttab_free_callback *cb; + spin_lock_irqsave(&gnttab_list_lock, flags); - if (callback->next) - goto out; + + /* Check if the callback is already on the list */ + cb = gnttab_free_callback_list; + while (cb) { + if (cb == callback) + goto out; + cb = cb->next; + } + callback->fn = fn; callback->arg = arg; callback->count = count; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index f8e5dd701ec..8e74590fa1b 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -43,9 +43,10 @@ MODULE_LICENSE("GPL"); #define PRIV_VMA_LOCKED ((void *)1) -#ifndef HAVE_ARCH_PRIVCMD_MMAP -static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); -#endif +static int privcmd_vma_range_is_mapped( + struct vm_area_struct *vma, + unsigned long addr, + unsigned long nr_pages); static long privcmd_ioctl_hypercall(void __user *udata) { @@ -225,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata) vma = find_vma(mm, msg->va); rc = -EINVAL; - if (!vma || (msg->va != vma->vm_start) || - !privcmd_enforce_singleshot_mapping(vma)) + if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data) goto out_up; + vma->vm_private_data = PRIV_VMA_LOCKED; } state.va = vma->vm_start; @@ -358,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) kfree(pages); return -ENOMEM; } - BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); + BUG_ON(vma->vm_private_data != NULL); vma->vm_private_data = pages; return 0; @@ -421,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) vma = find_vma(mm, m.addr); if (!vma || - vma->vm_ops != &privcmd_vm_ops || - (m.addr != vma->vm_start) || - ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || - !privcmd_enforce_singleshot_mapping(vma)) { - up_write(&mm->mmap_sem); + vma->vm_ops != &privcmd_vm_ops) { ret = -EINVAL; - goto out; + goto out_unlock; } - if (xen_feature(XENFEAT_auto_translated_physmap)) { - ret = alloc_empty_pages(vma, m.num); - if (ret < 0) { - up_write(&mm->mmap_sem); - goto out; + + /* + * Caller must either: + * + * Map the whole VMA range, which will also allocate all the + * pages required for the auto_translated_physmap case. + * + * Or + * + * Map unmapped holes left from a previous map attempt (e.g., + * because those foreign frames were previously paged out). + */ + if (vma->vm_private_data == NULL) { + if (m.addr != vma->vm_start || + m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) { + ret = -EINVAL; + goto out_unlock; + } + if (xen_feature(XENFEAT_auto_translated_physmap)) { + ret = alloc_empty_pages(vma, m.num); + if (ret < 0) + goto out_unlock; + } else + vma->vm_private_data = PRIV_VMA_LOCKED; + } else { + if (m.addr < vma->vm_start || + m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) { + ret = -EINVAL; + goto out_unlock; + } + if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) { + ret = -EINVAL; + goto out_unlock; } } @@ -466,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) out: free_page_list(&pagelist); - return ret; + +out_unlock: + up_write(&mm->mmap_sem); + goto out; } static long privcmd_ioctl(struct file *file, @@ -540,9 +568,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) +/* + * For MMAPBATCH*. This allows asserting the singleshot mapping + * on a per pfn/pte basis. Mapping calls that fail with ENOENT + * can be then retried until success. + */ +static int is_mapped_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + return pte_none(*pte) ? 0 : -EBUSY; +} + +static int privcmd_vma_range_is_mapped( + struct vm_area_struct *vma, + unsigned long addr, + unsigned long nr_pages) { - return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); + return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT, + is_mapped_fn, NULL) != 0; } const struct file_operations xen_privcmd_fops = { diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index aadffcf7db9..1b2277c311d 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -506,13 +506,13 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, to do proper error handling. */ xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, attrs); - sgl[0].dma_length = 0; + sg_dma_len(sgl) = 0; return DMA_ERROR_CODE; } sg->dma_address = xen_phys_to_bus(map); } else sg->dma_address = dev_addr; - sg->dma_length = sg->length; + sg_dma_len(sg) = sg->length; } return nelems; } @@ -533,7 +533,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) - xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); + xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); } EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); @@ -555,7 +555,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, for_each_sg(sgl, sg, nelems, i) xen_swiotlb_sync_single(hwdev, sg->dma_address, - sg->dma_length, dir, target); + sg_dma_len(sg), dir, target); } void diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 02817a85f87..21e18c18c7a 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -265,8 +265,10 @@ static ssize_t store_selfballooning(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &tmp); - if (err || ((tmp != 0) && (tmp != 1))) + err = kstrtoul(buf, 10, &tmp); + if (err) + return err; + if ((tmp != 0) && (tmp != 1)) return -EINVAL; xen_selfballooning_enabled = !!tmp; @@ -292,8 +294,10 @@ static ssize_t store_selfballoon_interval(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_interval = val; return count; @@ -314,8 +318,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_downhysteresis = val; return count; @@ -337,8 +343,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_uphysteresis = val; return count; @@ -360,8 +368,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_min_usable_mb = val; return count; @@ -384,8 +394,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_reserved_mb = val; return count; @@ -410,8 +422,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &tmp); - if (err || ((tmp != 0) && (tmp != 1))) + err = kstrtoul(buf, 10, &tmp); + if (err) + return err; + if ((tmp != 0) && (tmp != 1)) return -EINVAL; frontswap_selfshrinking = !!tmp; if (!was_enabled && !xen_selfballooning_enabled && @@ -437,8 +451,10 @@ static ssize_t store_frontswap_inertia(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; frontswap_inertia = val; frontswap_inertia_counter = val; @@ -460,8 +476,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; frontswap_hysteresis = val; return count; diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 6ed8a9df447..34b20bfa4e8 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -115,7 +115,6 @@ static int xenbus_frontend_dev_resume(struct device *dev) return -EFAULT; } - INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume); queue_work(xenbus_frontend_wq, &xdev->work); return 0; @@ -124,6 +123,16 @@ static int xenbus_frontend_dev_resume(struct device *dev) return xenbus_dev_resume(dev); } +static int xenbus_frontend_dev_probe(struct device *dev) +{ + if (xen_store_domain_type == XS_LOCAL) { + struct xenbus_device *xdev = to_xenbus_device(dev); + INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume); + } + + return xenbus_dev_probe(dev); +} + static const struct dev_pm_ops xenbus_pm_ops = { .suspend = xenbus_dev_suspend, .resume = xenbus_frontend_dev_resume, @@ -142,7 +151,7 @@ static struct xen_bus_type xenbus_frontend = { .name = "xen", .match = xenbus_match, .uevent = xenbus_uevent_frontend, - .probe = xenbus_dev_probe, + .probe = xenbus_frontend_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, .dev_attrs = xenbus_dev_attrs, @@ -474,7 +483,11 @@ static int __init xenbus_probe_frontend_init(void) register_xenstore_notifier(&xenstore_notifier); - xenbus_frontend_wq = create_workqueue("xenbus_frontend"); + if (xen_store_domain_type == XS_LOCAL) { + xenbus_frontend_wq = create_workqueue("xenbus_frontend"); + if (!xenbus_frontend_wq) + pr_warn("create xenbus frontend workqueue failed, S3 resume is likely to fail\n"); + } return 0; } |