summaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig2
-rw-r--r--drivers/xen/Makefile5
-rw-r--r--drivers/xen/acpi.c41
-rw-r--r--drivers/xen/balloon.c74
-rw-r--r--drivers/xen/events.c43
-rw-r--r--drivers/xen/evtchn.c210
-rw-r--r--drivers/xen/gntdev.c11
-rw-r--r--drivers/xen/grant-table.c13
-rw-r--r--drivers/xen/privcmd.c83
-rw-r--r--drivers/xen/swiotlb-xen.c8
-rw-r--r--drivers/xen/xen-selfballoon.c54
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c19
12 files changed, 382 insertions, 181 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 9e02d60a364..23eae5cb69c 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -145,7 +145,7 @@ config SWIOTLB_XEN
config XEN_TMEM
tristate
- depends on !ARM
+ depends on !ARM && !ARM64
default m if (CLEANCACHE || FRONTSWAP)
help
Shim to interface in-kernel Transcendent Memory hooks
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index eabd0ee1c2b..14fe79d8634 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,9 +1,8 @@
-ifneq ($(CONFIG_ARM),y)
-obj-y += manage.o
+ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
endif
obj-$(CONFIG_X86) += fallback.o
-obj-y += grant-table.o features.o events.o balloon.o
+obj-y += grant-table.o features.o events.o balloon.o manage.o
obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
index 119d42a2bf5..90307c0b630 100644
--- a/drivers/xen/acpi.c
+++ b/drivers/xen/acpi.c
@@ -35,28 +35,43 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
-int xen_acpi_notify_hypervisor_state(u8 sleep_state,
- u32 pm1a_cnt, u32 pm1b_cnt)
+static int xen_acpi_notify_hypervisor_state(u8 sleep_state,
+ u32 val_a, u32 val_b,
+ bool extended)
{
+ unsigned int bits = extended ? 8 : 16;
+
struct xen_platform_op op = {
.cmd = XENPF_enter_acpi_sleep,
.interface_version = XENPF_INTERFACE_VERSION,
- .u = {
- .enter_acpi_sleep = {
- .pm1a_cnt_val = (u16)pm1a_cnt,
- .pm1b_cnt_val = (u16)pm1b_cnt,
- .sleep_state = sleep_state,
- },
+ .u.enter_acpi_sleep = {
+ .val_a = (u16)val_a,
+ .val_b = (u16)val_b,
+ .sleep_state = sleep_state,
+ .flags = extended ? XENPF_ACPI_SLEEP_EXTENDED : 0,
},
};
- if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) {
- WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!"
- "Email xen-devel@lists.xensource.com Thank you.\n", \
- pm1a_cnt, pm1b_cnt);
+ if (WARN((val_a & (~0 << bits)) || (val_b & (~0 << bits)),
+ "Using more than %u bits of sleep control values %#x/%#x!"
+ "Email xen-devel@lists.xen.org - Thank you.\n", \
+ bits, val_a, val_b))
return -1;
- }
HYPERVISOR_dom0_op(&op);
return 1;
}
+
+int xen_acpi_notify_hypervisor_sleep(u8 sleep_state,
+ u32 pm1a_cnt, u32 pm1b_cnt)
+{
+ return xen_acpi_notify_hypervisor_state(sleep_state, pm1a_cnt,
+ pm1b_cnt, false);
+}
+
+int xen_acpi_notify_hypervisor_extended_sleep(u8 sleep_state,
+ u32 val_a, u32 val_b)
+{
+ return xen_acpi_notify_hypervisor_state(sleep_state, val_a,
+ val_b, true);
+}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 2a2ef97697b..3101cf6daf5 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -38,6 +38,7 @@
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/errno.h>
@@ -52,6 +53,7 @@
#include <linux/notifier.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
+#include <linux/percpu-defs.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -90,6 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats);
/* We increase/decrease in batches which fit in a page */
static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
+static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
+
/* List of ballooned pages, threaded through the mem_map array. */
static LIST_HEAD(ballooned_pages);
@@ -412,7 +416,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
if (xen_pv_domain() && !PageHighMem(page)) {
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
+ pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)),
+ PAGE_KERNEL_RO), 0);
BUG_ON(ret);
}
#endif
@@ -425,7 +430,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = mfn_to_pfn(frame_list[i]);
- __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ unsigned long p;
+ struct page *pg;
+ pg = __get_cpu_var(balloon_scratch_page);
+ p = page_to_pfn(pg);
+ __set_phys_to_machine(pfn, pfn_to_mfn(p));
+ }
balloon_append(pfn_to_page(pfn));
}
@@ -480,6 +491,18 @@ static void balloon_process(struct work_struct *work)
mutex_unlock(&balloon_mutex);
}
+struct page *get_balloon_scratch_page(void)
+{
+ struct page *ret = get_cpu_var(balloon_scratch_page);
+ BUG_ON(ret == NULL);
+ return ret;
+}
+
+void put_balloon_scratch_page(void)
+{
+ put_cpu_var(balloon_scratch_page);
+}
+
/* Resets the Xen limit, sets new target, and kicks off processing. */
void balloon_set_new_target(unsigned long target)
{
@@ -573,13 +596,47 @@ static void __init balloon_add_region(unsigned long start_pfn,
}
}
+static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (per_cpu(balloon_scratch_page, cpu) != NULL)
+ break;
+ per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+ if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+ pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+ return NOTIFY_BAD;
+ }
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block balloon_cpu_notifier __cpuinitdata = {
+ .notifier_call = balloon_cpu_notify,
+};
+
static int __init balloon_init(void)
{
- int i;
+ int i, cpu;
if (!xen_domain())
return -ENODEV;
+ for_each_online_cpu(cpu)
+ {
+ per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+ if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+ pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+ return -ENOMEM;
+ }
+ }
+ register_cpu_notifier(&balloon_cpu_notifier);
+
pr_info("Initialising balloon driver\n");
balloon_stats.current_pages = xen_pv_domain()
@@ -616,4 +673,15 @@ static int __init balloon_init(void)
subsys_initcall(balloon_init);
+static int __init balloon_clear(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(balloon_scratch_page, cpu) = NULL;
+
+ return 0;
+}
+early_initcall(balloon_clear);
+
MODULE_LICENSE("GPL");
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index a58ac435a9a..4035e833ea2 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -56,6 +56,7 @@
#include <xen/interface/hvm/params.h>
#include <xen/interface/physdev.h>
#include <xen/interface/sched.h>
+#include <xen/interface/vcpu.h>
#include <asm/hw_irq.h>
/*
@@ -348,7 +349,7 @@ static void init_evtchn_cpu_bindings(void)
for_each_possible_cpu(i)
memset(per_cpu(cpu_evtchn_mask, i),
- (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
+ (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
}
static inline void clear_evtchn(int port)
@@ -1212,7 +1213,17 @@ EXPORT_SYMBOL_GPL(evtchn_put);
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
{
- int irq = per_cpu(ipi_to_irq, cpu)[vector];
+ int irq;
+
+#ifdef CONFIG_X86
+ if (unlikely(vector == XEN_NMI_VECTOR)) {
+ int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
+ if (rc < 0)
+ printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
+ return;
+ }
+#endif
+ irq = per_cpu(ipi_to_irq, cpu)[vector];
BUG_ON(irq < 0);
notify_remote_via_irq(irq);
}
@@ -1379,14 +1390,21 @@ static void __xen_evtchn_do_upcall(void)
pending_bits = active_evtchns(cpu, s, word_idx);
bit_idx = 0; /* usually scan entire word from start */
+ /*
+ * We scan the starting word in two parts.
+ *
+ * 1st time: start in the middle, scanning the
+ * upper bits.
+ *
+ * 2nd time: scan the whole word (not just the
+ * parts skipped in the first pass) -- if an
+ * event in the previously scanned bits is
+ * pending again it would just be scanned on
+ * the next loop anyway.
+ */
if (word_idx == start_word_idx) {
- /* We scan the starting word in two parts */
if (i == 0)
- /* 1st time: start in the middle */
bit_idx = start_bit_idx;
- else
- /* 2nd time: mask bits done already */
- bit_idx &= (1UL << start_bit_idx) - 1;
}
do {
@@ -1493,8 +1511,10 @@ void rebind_evtchn_irq(int evtchn, int irq)
/* Rebind an evtchn so that it gets delivered to a specific cpu */
static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
{
+ struct shared_info *s = HYPERVISOR_shared_info;
struct evtchn_bind_vcpu bind_vcpu;
int evtchn = evtchn_from_irq(irq);
+ int masked;
if (!VALID_EVTCHN(evtchn))
return -1;
@@ -1511,6 +1531,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
bind_vcpu.vcpu = tcpu;
/*
+ * Mask the event while changing the VCPU binding to prevent
+ * it being delivered on an unexpected VCPU.
+ */
+ masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
+
+ /*
* If this fails, it usually just indicates that we're dealing with a
* virq or IPI channel, which don't actually need to be rebound. Ignore
* it, but don't do the xenlinux-level rebind in that case.
@@ -1518,6 +1544,9 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
bind_evtchn_to_cpu(evtchn, tcpu);
+ if (!masked)
+ unmask_evtchn(evtchn);
+
return 0;
}
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 8feecf01d55..8b3a69a06c3 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -57,6 +57,7 @@
struct per_user_data {
struct mutex bind_mutex; /* serialize bind/unbind operations */
+ struct rb_root evtchns;
/* Notification ring, accessed via /dev/xen/evtchn. */
#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
@@ -64,6 +65,7 @@ struct per_user_data {
evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow;
struct mutex ring_cons_mutex; /* protect against concurrent readers */
+ spinlock_t ring_prod_lock; /* product against concurrent interrupts */
/* Processes wait on this queue when ring is empty. */
wait_queue_head_t evtchn_wait;
@@ -71,54 +73,79 @@ struct per_user_data {
const char *name;
};
-/*
- * Who's bound to each port? This is logically an array of struct
- * per_user_data *, but we encode the current enabled-state in bit 0.
- */
-static unsigned long *port_user;
-static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
+struct user_evtchn {
+ struct rb_node node;
+ struct per_user_data *user;
+ unsigned port;
+ bool enabled;
+};
-static inline struct per_user_data *get_port_user(unsigned port)
+static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
- return (struct per_user_data *)(port_user[port] & ~1);
-}
+ struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
-static inline void set_port_user(unsigned port, struct per_user_data *u)
-{
- port_user[port] = (unsigned long)u;
+ while (*new) {
+ struct user_evtchn *this;
+
+ this = container_of(*new, struct user_evtchn, node);
+
+ parent = *new;
+ if (this->port < evtchn->port)
+ new = &((*new)->rb_left);
+ else if (this->port > evtchn->port)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&evtchn->node, parent, new);
+ rb_insert_color(&evtchn->node, &u->evtchns);
+
+ return 0;
}
-static inline bool get_port_enabled(unsigned port)
+static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
- return port_user[port] & 1;
+ rb_erase(&evtchn->node, &u->evtchns);
+ kfree(evtchn);
}
-static inline void set_port_enabled(unsigned port, bool enabled)
+static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
{
- if (enabled)
- port_user[port] |= 1;
- else
- port_user[port] &= ~1;
+ struct rb_node *node = u->evtchns.rb_node;
+
+ while (node) {
+ struct user_evtchn *evtchn;
+
+ evtchn = container_of(node, struct user_evtchn, node);
+
+ if (evtchn->port < port)
+ node = node->rb_left;
+ else if (evtchn->port > port)
+ node = node->rb_right;
+ else
+ return evtchn;
+ }
+ return NULL;
}
static irqreturn_t evtchn_interrupt(int irq, void *data)
{
- unsigned int port = (unsigned long)data;
- struct per_user_data *u;
+ struct user_evtchn *evtchn = data;
+ struct per_user_data *u = evtchn->user;
- spin_lock(&port_user_lock);
-
- u = get_port_user(port);
-
- WARN(!get_port_enabled(port),
+ WARN(!evtchn->enabled,
"Interrupt for port %d, but apparently not enabled; per-user %p\n",
- port, u);
+ evtchn->port, u);
disable_irq_nosync(irq);
- set_port_enabled(port, false);
+ evtchn->enabled = false;
+
+ spin_lock(&u->ring_prod_lock);
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
- u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+ u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait);
@@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
} else
u->ring_overflow = 1;
- spin_unlock(&port_user_lock);
+ spin_unlock(&u->ring_prod_lock);
return IRQ_HANDLED;
}
@@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
if (copy_from_user(kbuf, buf, count) != 0)
goto out;
- spin_lock_irq(&port_user_lock);
+ mutex_lock(&u->bind_mutex);
for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
unsigned port = kbuf[i];
+ struct user_evtchn *evtchn;
- if (port < NR_EVENT_CHANNELS &&
- get_port_user(port) == u &&
- !get_port_enabled(port)) {
- set_port_enabled(port, true);
+ evtchn = find_evtchn(u, port);
+ if (evtchn && !evtchn->enabled) {
+ evtchn->enabled = true;
enable_irq(irq_from_evtchn(port));
}
}
- spin_unlock_irq(&port_user_lock);
+ mutex_unlock(&u->bind_mutex);
rc = count;
@@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
static int evtchn_bind_to_user(struct per_user_data *u, int port)
{
+ struct user_evtchn *evtchn;
+ struct evtchn_close close;
int rc = 0;
/*
@@ -263,35 +292,46 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
* interrupt handler yet, and our caller has already
* serialized bind operations.)
*/
- BUG_ON(get_port_user(port) != NULL);
- set_port_user(port, u);
- set_port_enabled(port, true); /* start enabled */
+
+ evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
+ if (!evtchn)
+ return -ENOMEM;
+
+ evtchn->user = u;
+ evtchn->port = port;
+ evtchn->enabled = true; /* start enabled */
+
+ rc = add_evtchn(u, evtchn);
+ if (rc < 0)
+ goto err;
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
- u->name, (void *)(unsigned long)port);
- if (rc >= 0)
- rc = evtchn_make_refcounted(port);
- else {
- /* bind failed, should close the port now */
- struct evtchn_close close;
- close.port = port;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
- set_port_user(port, NULL);
- }
+ u->name, evtchn);
+ if (rc < 0)
+ goto err;
+
+ rc = evtchn_make_refcounted(port);
+ return rc;
+err:
+ /* bind failed, should close the port now */
+ close.port = port;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+ del_evtchn(u, evtchn);
return rc;
}
-static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+static void evtchn_unbind_from_user(struct per_user_data *u,
+ struct user_evtchn *evtchn)
{
- int irq = irq_from_evtchn(port);
+ int irq = irq_from_evtchn(evtchn->port);
BUG_ON(irq < 0);
- unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+ unbind_from_irqhandler(irq, evtchn);
- set_port_user(port, NULL);
+ del_evtchn(u, evtchn);
}
static long evtchn_ioctl(struct file *file,
@@ -370,6 +410,7 @@ static long evtchn_ioctl(struct file *file,
case IOCTL_EVTCHN_UNBIND: {
struct ioctl_evtchn_unbind unbind;
+ struct user_evtchn *evtchn;
rc = -EFAULT;
if (copy_from_user(&unbind, uarg, sizeof(unbind)))
@@ -379,36 +420,28 @@ static long evtchn_ioctl(struct file *file,
if (unbind.port >= NR_EVENT_CHANNELS)
break;
- spin_lock_irq(&port_user_lock);
-
rc = -ENOTCONN;
- if (get_port_user(unbind.port) != u) {
- spin_unlock_irq(&port_user_lock);
+ evtchn = find_evtchn(u, unbind.port);
+ if (!evtchn)
break;
- }
disable_irq(irq_from_evtchn(unbind.port));
-
- spin_unlock_irq(&port_user_lock);
-
- evtchn_unbind_from_user(u, unbind.port);
-
+ evtchn_unbind_from_user(u, evtchn);
rc = 0;
break;
}
case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify;
+ struct user_evtchn *evtchn;
rc = -EFAULT;
if (copy_from_user(&notify, uarg, sizeof(notify)))
break;
- if (notify.port >= NR_EVENT_CHANNELS) {
- rc = -EINVAL;
- } else if (get_port_user(notify.port) != u) {
- rc = -ENOTCONN;
- } else {
+ rc = -ENOTCONN;
+ evtchn = find_evtchn(u, notify.port);
+ if (evtchn) {
notify_remote_via_evtchn(notify.port);
rc = 0;
}
@@ -418,9 +451,9 @@ static long evtchn_ioctl(struct file *file,
case IOCTL_EVTCHN_RESET: {
/* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex);
- spin_lock_irq(&port_user_lock);
+ spin_lock_irq(&u->ring_prod_lock);
u->ring_cons = u->ring_prod = u->ring_overflow = 0;
- spin_unlock_irq(&port_user_lock);
+ spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex);
rc = 0;
break;
@@ -479,6 +512,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
mutex_init(&u->bind_mutex);
mutex_init(&u->ring_cons_mutex);
+ spin_lock_init(&u->ring_prod_lock);
filp->private_data = u;
@@ -487,29 +521,18 @@ static int evtchn_open(struct inode *inode, struct file *filp)
static int evtchn_release(struct inode *inode, struct file *filp)
{
- int i;
struct per_user_data *u = filp->private_data;
+ struct rb_node *node;
- spin_lock_irq(&port_user_lock);
-
- free_page((unsigned long)u->ring);
-
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (get_port_user(i) != u)
- continue;
-
- disable_irq(irq_from_evtchn(i));
- }
-
- spin_unlock_irq(&port_user_lock);
-
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (get_port_user(i) != u)
- continue;
+ while ((node = u->evtchns.rb_node)) {
+ struct user_evtchn *evtchn;
- evtchn_unbind_from_user(get_port_user(i), i);
+ evtchn = rb_entry(node, struct user_evtchn, node);
+ disable_irq(irq_from_evtchn(evtchn->port));
+ evtchn_unbind_from_user(u, evtchn);
}
+ free_page((unsigned long)u->ring);
kfree(u->name);
kfree(u);
@@ -540,12 +563,6 @@ static int __init evtchn_init(void)
if (!xen_domain())
return -ENODEV;
- port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
- if (port_user == NULL)
- return -ENOMEM;
-
- spin_lock_init(&port_user_lock);
-
/* Create '/dev/xen/evtchn'. */
err = misc_register(&evtchn_miscdev);
if (err != 0) {
@@ -560,9 +577,6 @@ static int __init evtchn_init(void)
static void __exit evtchn_cleanup(void)
{
- kfree(port_user);
- port_user = NULL;
-
misc_deregister(&evtchn_miscdev);
}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eab5427c75f..e41c79c986e 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -272,19 +272,12 @@ static int map_grant_pages(struct grant_map *map)
* with find_grant_ptes.
*/
for (i = 0; i < map->count; i++) {
- unsigned level;
unsigned long address = (unsigned long)
pfn_to_kaddr(page_to_pfn(map->pages[i]));
- pte_t *ptep;
- u64 pte_maddr = 0;
BUG_ON(PageHighMem(map->pages[i]));
- ptep = lookup_address(address, &level);
- pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
- gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
- map->flags |
- GNTMAP_host_map |
- GNTMAP_contains_pte,
+ gnttab_set_map_op(&map->kmap_ops[i], address,
+ map->flags | GNTMAP_host_map,
map->grants[i].ref,
map->grants[i].domid);
}
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 04cdeb8e371..c4d2298893b 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -730,9 +730,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback,
void (*fn)(void *), void *arg, u16 count)
{
unsigned long flags;
+ struct gnttab_free_callback *cb;
+
spin_lock_irqsave(&gnttab_list_lock, flags);
- if (callback->next)
- goto out;
+
+ /* Check if the callback is already on the list */
+ cb = gnttab_free_callback_list;
+ while (cb) {
+ if (cb == callback)
+ goto out;
+ cb = cb->next;
+ }
+
callback->fn = fn;
callback->arg = arg;
callback->count = count;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index f8e5dd701ec..8e74590fa1b 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -43,9 +43,10 @@ MODULE_LICENSE("GPL");
#define PRIV_VMA_LOCKED ((void *)1)
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
-#endif
+static int privcmd_vma_range_is_mapped(
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long nr_pages);
static long privcmd_ioctl_hypercall(void __user *udata)
{
@@ -225,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata)
vma = find_vma(mm, msg->va);
rc = -EINVAL;
- if (!vma || (msg->va != vma->vm_start) ||
- !privcmd_enforce_singleshot_mapping(vma))
+ if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
goto out_up;
+ vma->vm_private_data = PRIV_VMA_LOCKED;
}
state.va = vma->vm_start;
@@ -358,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
kfree(pages);
return -ENOMEM;
}
- BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
+ BUG_ON(vma->vm_private_data != NULL);
vma->vm_private_data = pages;
return 0;
@@ -421,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
vma = find_vma(mm, m.addr);
if (!vma ||
- vma->vm_ops != &privcmd_vm_ops ||
- (m.addr != vma->vm_start) ||
- ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
- !privcmd_enforce_singleshot_mapping(vma)) {
- up_write(&mm->mmap_sem);
+ vma->vm_ops != &privcmd_vm_ops) {
ret = -EINVAL;
- goto out;
+ goto out_unlock;
}
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- ret = alloc_empty_pages(vma, m.num);
- if (ret < 0) {
- up_write(&mm->mmap_sem);
- goto out;
+
+ /*
+ * Caller must either:
+ *
+ * Map the whole VMA range, which will also allocate all the
+ * pages required for the auto_translated_physmap case.
+ *
+ * Or
+ *
+ * Map unmapped holes left from a previous map attempt (e.g.,
+ * because those foreign frames were previously paged out).
+ */
+ if (vma->vm_private_data == NULL) {
+ if (m.addr != vma->vm_start ||
+ m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = alloc_empty_pages(vma, m.num);
+ if (ret < 0)
+ goto out_unlock;
+ } else
+ vma->vm_private_data = PRIV_VMA_LOCKED;
+ } else {
+ if (m.addr < vma->vm_start ||
+ m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
+ ret = -EINVAL;
+ goto out_unlock;
}
}
@@ -466,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
out:
free_page_list(&pagelist);
-
return ret;
+
+out_unlock:
+ up_write(&mm->mmap_sem);
+ goto out;
}
static long privcmd_ioctl(struct file *file,
@@ -540,9 +568,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+/*
+ * For MMAPBATCH*. This allows asserting the singleshot mapping
+ * on a per pfn/pte basis. Mapping calls that fail with ENOENT
+ * can be then retried until success.
+ */
+static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
+ unsigned long addr, void *data)
+{
+ return pte_none(*pte) ? 0 : -EBUSY;
+}
+
+static int privcmd_vma_range_is_mapped(
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long nr_pages)
{
- return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
+ return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
+ is_mapped_fn, NULL) != 0;
}
const struct file_operations xen_privcmd_fops = {
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index aadffcf7db9..1b2277c311d 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -506,13 +506,13 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
to do proper error handling. */
xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
attrs);
- sgl[0].dma_length = 0;
+ sg_dma_len(sgl) = 0;
return DMA_ERROR_CODE;
}
sg->dma_address = xen_phys_to_bus(map);
} else
sg->dma_address = dev_addr;
- sg->dma_length = sg->length;
+ sg_dma_len(sg) = sg->length;
}
return nelems;
}
@@ -533,7 +533,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i)
- xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+ xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
}
EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
@@ -555,7 +555,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
for_each_sg(sgl, sg, nelems, i)
xen_swiotlb_sync_single(hwdev, sg->dma_address,
- sg->dma_length, dir, target);
+ sg_dma_len(sg), dir, target);
}
void
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 02817a85f87..21e18c18c7a 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -265,8 +265,10 @@ static ssize_t store_selfballooning(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &tmp);
- if (err || ((tmp != 0) && (tmp != 1)))
+ err = kstrtoul(buf, 10, &tmp);
+ if (err)
+ return err;
+ if ((tmp != 0) && (tmp != 1))
return -EINVAL;
xen_selfballooning_enabled = !!tmp;
@@ -292,8 +294,10 @@ static ssize_t store_selfballoon_interval(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_interval = val;
return count;
@@ -314,8 +318,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_downhysteresis = val;
return count;
@@ -337,8 +343,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_uphysteresis = val;
return count;
@@ -360,8 +368,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_min_usable_mb = val;
return count;
@@ -384,8 +394,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_reserved_mb = val;
return count;
@@ -410,8 +422,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &tmp);
- if (err || ((tmp != 0) && (tmp != 1)))
+ err = kstrtoul(buf, 10, &tmp);
+ if (err)
+ return err;
+ if ((tmp != 0) && (tmp != 1))
return -EINVAL;
frontswap_selfshrinking = !!tmp;
if (!was_enabled && !xen_selfballooning_enabled &&
@@ -437,8 +451,10 @@ static ssize_t store_frontswap_inertia(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
frontswap_inertia = val;
frontswap_inertia_counter = val;
@@ -460,8 +476,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
frontswap_hysteresis = val;
return count;
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 6ed8a9df447..34b20bfa4e8 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -115,7 +115,6 @@ static int xenbus_frontend_dev_resume(struct device *dev)
return -EFAULT;
}
- INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume);
queue_work(xenbus_frontend_wq, &xdev->work);
return 0;
@@ -124,6 +123,16 @@ static int xenbus_frontend_dev_resume(struct device *dev)
return xenbus_dev_resume(dev);
}
+static int xenbus_frontend_dev_probe(struct device *dev)
+{
+ if (xen_store_domain_type == XS_LOCAL) {
+ struct xenbus_device *xdev = to_xenbus_device(dev);
+ INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume);
+ }
+
+ return xenbus_dev_probe(dev);
+}
+
static const struct dev_pm_ops xenbus_pm_ops = {
.suspend = xenbus_dev_suspend,
.resume = xenbus_frontend_dev_resume,
@@ -142,7 +151,7 @@ static struct xen_bus_type xenbus_frontend = {
.name = "xen",
.match = xenbus_match,
.uevent = xenbus_uevent_frontend,
- .probe = xenbus_dev_probe,
+ .probe = xenbus_frontend_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
.dev_attrs = xenbus_dev_attrs,
@@ -474,7 +483,11 @@ static int __init xenbus_probe_frontend_init(void)
register_xenstore_notifier(&xenstore_notifier);
- xenbus_frontend_wq = create_workqueue("xenbus_frontend");
+ if (xen_store_domain_type == XS_LOCAL) {
+ xenbus_frontend_wq = create_workqueue("xenbus_frontend");
+ if (!xenbus_frontend_wq)
+ pr_warn("create xenbus frontend workqueue failed, S3 resume is likely to fail\n");
+ }
return 0;
}