summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/xen/page.h6
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/pci/xen.c32
-rw-r--r--arch/x86/xen/Kconfig11
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--arch/x86/xen/mmu.c52
-rw-r--r--arch/x86/xen/p2m.c128
-rw-r--r--arch/x86/xen/setup.c284
-rw-r--r--drivers/block/xen-blkback/blkback.c2
-rw-r--r--drivers/pci/xen-pcifront.c5
-rw-r--r--drivers/xen/Kconfig10
-rw-r--r--drivers/xen/Makefile4
-rw-r--r--drivers/xen/balloon.c62
-rw-r--r--drivers/xen/events.c47
-rw-r--r--drivers/xen/gntdev.c39
-rw-r--r--drivers/xen/grant-table.c6
-rw-r--r--drivers/xen/pci.c105
-rw-r--r--drivers/xen/swiotlb-xen.c70
-rw-r--r--drivers/xen/xen-pciback/conf_space.c1
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c5
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.c3
-rw-r--r--drivers/xen/xen-pciback/passthrough.c34
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c35
-rw-r--r--drivers/xen/xen-pciback/pciback.h32
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c1
-rw-r--r--drivers/xen/xen-pciback/vpci.c35
-rw-r--r--drivers/xen/xen-pciback/xenbus.c27
-rw-r--r--drivers/xen/xen-selfballoon.c67
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c4
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c101
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c2
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c121
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c17
-rw-r--r--include/xen/balloon.h5
-rw-r--r--include/xen/grant_table.h1
-rw-r--r--include/xen/interface/io/xs_wire.h6
-rw-r--r--include/xen/interface/physdev.h34
-rw-r--r--include/xen/page.h12
38 files changed, 913 insertions, 498 deletions
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 7ff4669580c..c34f96c2f7a 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -12,6 +12,7 @@
#include <asm/pgtable.h>
#include <xen/interface/xen.h>
+#include <xen/grant_table.h>
#include <xen/features.h>
/* Xen machine address */
@@ -48,14 +49,11 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
extern int m2p_add_override(unsigned long mfn, struct page *page,
- bool clear_pte);
+ struct gnttab_map_grant_ref *kmap_op);
extern int m2p_remove_override(struct page *page, bool clear_pte);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
-#ifdef CONFIG_XEN_DEBUG_FS
-extern int p2m_dump_show(struct seq_file *m, void *v);
-#endif
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
unsigned long mfn;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f1a6244d7d9..794bc95134c 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -75,8 +75,10 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
/*
* Undefined/reserved opcodes, conditional jump, Opcode Extension
* Groups, and some special opcodes can not boost.
+ * This is non-const to keep gcc from statically optimizing it out, as
+ * variable_test_bit makes gcc think only *(unsigned long*) is used.
*/
-static const u32 twobyte_is_boostable[256 / 32] = {
+static u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 1017c7bee38..492ade8c978 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -175,8 +175,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
"pcifront-msi-x" :
"pcifront-msi",
DOMID_SELF);
- if (irq < 0)
+ if (irq < 0) {
+ ret = irq;
goto free;
+ }
i++;
}
kfree(v);
@@ -221,8 +223,10 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (msg.data != XEN_PIRQ_MSI_DATA ||
xen_irq_from_pirq(pirq) < 0) {
pirq = xen_allocate_pirq_msi(dev, msidesc);
- if (pirq < 0)
+ if (pirq < 0) {
+ irq = -ENODEV;
goto error;
+ }
xen_msi_compose_msg(dev, pirq, &msg);
__write_msi_msg(msidesc, &msg);
dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
@@ -244,10 +248,12 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
error:
dev_err(&dev->dev,
"Xen PCI frontend has not registered MSI/MSI-X support!\n");
- return -ENODEV;
+ return irq;
}
#ifdef CONFIG_XEN_DOM0
+static bool __read_mostly pci_seg_supported = true;
+
static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int ret = 0;
@@ -265,10 +271,11 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
memset(&map_irq, 0, sizeof(map_irq));
map_irq.domid = domid;
- map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
map_irq.index = -1;
map_irq.pirq = -1;
- map_irq.bus = dev->bus->number;
+ map_irq.bus = dev->bus->number |
+ (pci_domain_nr(dev->bus) << 16);
map_irq.devfn = dev->devfn;
if (type == PCI_CAP_ID_MSIX) {
@@ -285,7 +292,20 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
}
- ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+ ret = -EINVAL;
+ if (pci_seg_supported)
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+ &map_irq);
+ if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.index = -1;
+ map_irq.pirq = -1;
+ map_irq.bus = dev->bus->number;
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+ &map_irq);
+ if (ret != -EINVAL)
+ pci_seg_supported = false;
+ }
if (ret) {
dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
ret, domid);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5cc821cb2e0..26c731a106a 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -25,8 +25,7 @@ config XEN_PRIVILEGED_GUEST
config XEN_PVHVM
def_bool y
- depends on XEN
- depends on X86_LOCAL_APIC
+ depends on XEN && PCI && X86_LOCAL_APIC
config XEN_MAX_DOMAIN_MEMORY
int
@@ -49,11 +48,3 @@ config XEN_DEBUG_FS
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
-
-config XEN_DEBUG
- bool "Enable Xen debug checks"
- depends on XEN
- default n
- help
- Enable various WARN_ON checks in the Xen MMU code.
- Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2d69617950f..da8afd576a6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void)
~((1 << X86_FEATURE_APIC) | /* disable local APIC */
(1 << X86_FEATURE_ACPI)); /* disable ACPI */
ax = 1;
+ cx = 0;
xen_cpuid(&ax, &bx, &cx, &dx);
xsave_mask =
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3dd53f997b1..87f6673b120 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -495,41 +495,6 @@ static pte_t xen_make_pte(pteval_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
-#ifdef CONFIG_XEN_DEBUG
-pte_t xen_make_pte_debug(pteval_t pte)
-{
- phys_addr_t addr = (pte & PTE_PFN_MASK);
- phys_addr_t other_addr;
- bool io_page = false;
- pte_t _pte;
-
- if (pte & _PAGE_IOMAP)
- io_page = true;
-
- _pte = xen_make_pte(pte);
-
- if (!addr)
- return _pte;
-
- if (io_page &&
- (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
- other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
- WARN_ONCE(addr != other_addr,
- "0x%lx is using VM_IO, but it is 0x%lx!\n",
- (unsigned long)addr, (unsigned long)other_addr);
- } else {
- pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
- other_addr = (_pte.pte & PTE_PFN_MASK);
- WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
- "0x%lx is missing VM_IO (and wasn't fixed)!\n",
- (unsigned long)addr);
- }
-
- return _pte;
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
-#endif
-
static pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
@@ -1992,9 +1957,6 @@ void __init xen_ident_map_ISA(void)
static void __init xen_post_allocator_init(void)
{
-#ifdef CONFIG_XEN_DEBUG
- pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
-#endif
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
@@ -2404,17 +2366,3 @@ out:
return err;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
-
-#ifdef CONFIG_XEN_DEBUG_FS
-static int p2m_dump_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, p2m_dump_show, NULL);
-}
-
-static const struct file_operations p2m_dump_fops = {
- .open = p2m_dump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 58efeb9d544..1b267e75158 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,7 +161,9 @@
#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
+#include <xen/grant_table.h>
+#include "multicalls.h"
#include "xen-ops.h"
static void __init m2p_override_init(void);
@@ -676,7 +678,8 @@ static unsigned long mfn_hash(unsigned long mfn)
}
/* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
+int m2p_add_override(unsigned long mfn, struct page *page,
+ struct gnttab_map_grant_ref *kmap_op)
{
unsigned long flags;
unsigned long pfn;
@@ -692,16 +695,28 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
"m2p_add_override: pfn %lx not mapped", pfn))
return -EINVAL;
}
-
- page->private = mfn;
+ WARN_ON(PagePrivate(page));
+ SetPagePrivate(page);
+ set_page_private(page, mfn);
page->index = pfn_to_mfn(pfn);
if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
return -ENOMEM;
- if (clear_pte && !PageHighMem(page))
- /* Just zap old mapping for now */
- pte_clear(&init_mm, address, ptep);
+ if (kmap_op != NULL) {
+ if (!PageHighMem(page)) {
+ struct multicall_space mcs =
+ xen_mc_entry(sizeof(*kmap_op));
+
+ MULTI_grant_table_op(mcs.mc,
+ GNTTABOP_map_grant_ref, kmap_op, 1);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+ }
+ /* let's use dev_bus_addr to record the old mfn instead */
+ kmap_op->dev_bus_addr = page->index;
+ page->index = (unsigned long) kmap_op;
+ }
spin_lock_irqsave(&m2p_override_lock, flags);
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
spin_unlock_irqrestore(&m2p_override_lock, flags);
@@ -735,13 +750,56 @@ int m2p_remove_override(struct page *page, bool clear_pte)
spin_lock_irqsave(&m2p_override_lock, flags);
list_del(&page->lru);
spin_unlock_irqrestore(&m2p_override_lock, flags);
- set_phys_to_machine(pfn, page->index);
+ WARN_ON(!PagePrivate(page));
+ ClearPagePrivate(page);
- if (clear_pte && !PageHighMem(page))
- set_pte_at(&init_mm, address, ptep,
- pfn_pte(pfn, PAGE_KERNEL));
- /* No tlb flush necessary because the caller already
- * left the pte unmapped. */
+ if (clear_pte) {
+ struct gnttab_map_grant_ref *map_op =
+ (struct gnttab_map_grant_ref *) page->index;
+ set_phys_to_machine(pfn, map_op->dev_bus_addr);
+ if (!PageHighMem(page)) {
+ struct multicall_space mcs;
+ struct gnttab_unmap_grant_ref *unmap_op;
+
+ /*
+ * It might be that we queued all the m2p grant table
+ * hypercalls in a multicall, then m2p_remove_override
+ * get called before the multicall has actually been
+ * issued. In this case handle is going to -1 because
+ * it hasn't been modified yet.
+ */
+ if (map_op->handle == -1)
+ xen_mc_flush();
+ /*
+ * Now if map_op->handle is negative it means that the
+ * hypercall actually returned an error.
+ */
+ if (map_op->handle == GNTST_general_error) {
+ printk(KERN_WARNING "m2p_remove_override: "
+ "pfn %lx mfn %lx, failed to modify kernel mappings",
+ pfn, mfn);
+ return -1;
+ }
+
+ mcs = xen_mc_entry(
+ sizeof(struct gnttab_unmap_grant_ref));
+ unmap_op = mcs.args;
+ unmap_op->host_addr = map_op->host_addr;
+ unmap_op->handle = map_op->handle;
+ unmap_op->dev_bus_addr = 0;
+
+ MULTI_grant_table_op(mcs.mc,
+ GNTTABOP_unmap_grant_ref, unmap_op, 1);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+ set_pte_at(&init_mm, address, ptep,
+ pfn_pte(pfn, PAGE_KERNEL));
+ __flush_tlb_single(address);
+ map_op->host_addr = 0;
+ }
+ } else
+ set_phys_to_machine(pfn, page->index);
return 0;
}
@@ -758,7 +816,7 @@ struct page *m2p_find_override(unsigned long mfn)
spin_lock_irqsave(&m2p_override_lock, flags);
list_for_each_entry(p, bucket, lru) {
- if (p->private == mfn) {
+ if (page_private(p) == mfn) {
ret = p;
break;
}
@@ -782,17 +840,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
#ifdef CONFIG_XEN_DEBUG_FS
-
-int p2m_dump_show(struct seq_file *m, void *v)
+#include <linux/debugfs.h>
+#include "debugfs.h"
+static int p2m_dump_show(struct seq_file *m, void *v)
{
static const char * const level_name[] = { "top", "middle",
- "entry", "abnormal" };
- static const char * const type_name[] = { "identity", "missing",
- "pfn", "abnormal"};
+ "entry", "abnormal", "error"};
#define TYPE_IDENTITY 0
#define TYPE_MISSING 1
#define TYPE_PFN 2
#define TYPE_UNKNOWN 3
+ static const char * const type_name[] = {
+ [TYPE_IDENTITY] = "identity",
+ [TYPE_MISSING] = "missing",
+ [TYPE_PFN] = "pfn",
+ [TYPE_UNKNOWN] = "abnormal"};
unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
unsigned int uninitialized_var(prev_level);
unsigned int uninitialized_var(prev_type);
@@ -856,4 +918,32 @@ int p2m_dump_show(struct seq_file *m, void *v)
#undef TYPE_PFN
#undef TYPE_UNKNOWN
}
-#endif
+
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+ .open = p2m_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static struct dentry *d_mmu_debug;
+
+static int __init xen_p2m_debugfs(void)
+{
+ struct dentry *d_xen = xen_init_debugfs();
+
+ if (d_xen == NULL)
+ return -ENOMEM;
+
+ d_mmu_debug = debugfs_create_dir("mmu", d_xen);
+
+ debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
+ return 0;
+}
+fs_initcall(xen_p2m_debugfs);
+#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 46d6d21dbdb..38d0af4fefe 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -37,7 +37,10 @@ extern void xen_syscall_target(void);
extern void xen_syscall32_target(void);
/* Amount of extra memory space we add to the e820 ranges */
-phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
+
+/* Number of pages released from the initial allocation. */
+unsigned long xen_released_pages;
/*
* The maximum amount of extra memory compared to the base size. The
@@ -51,48 +54,47 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
*/
#define EXTRA_MEM_RATIO (10)
-static void __init xen_add_extra_mem(unsigned long pages)
+static void __init xen_add_extra_mem(u64 start, u64 size)
{
unsigned long pfn;
+ int i;
- u64 size = (u64)pages * PAGE_SIZE;
- u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
-
- if (!pages)
- return;
-
- e820_add_region(extra_start, size, E820_RAM);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-
- memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+ /* Add new region. */
+ if (xen_extra_mem[i].size == 0) {
+ xen_extra_mem[i].start = start;
+ xen_extra_mem[i].size = size;
+ break;
+ }
+ /* Append to existing region. */
+ if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
+ xen_extra_mem[i].size += size;
+ break;
+ }
+ }
+ if (i == XEN_EXTRA_MEM_MAX_REGIONS)
+ printk(KERN_WARNING "Warning: not enough extra memory regions\n");
- xen_extra_mem_size += size;
+ memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
- xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
+ xen_max_p2m_pfn = PFN_DOWN(start + size);
- for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
+ for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
-static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
- phys_addr_t end_addr)
+static unsigned long __init xen_release_chunk(unsigned long start,
+ unsigned long end)
{
struct xen_memory_reservation reservation = {
.address_bits = 0,
.extent_order = 0,
.domid = DOMID_SELF
};
- unsigned long start, end;
unsigned long len = 0;
unsigned long pfn;
int ret;
- start = PFN_UP(start_addr);
- end = PFN_DOWN(end_addr);
-
- if (end <= start)
- return 0;
-
for(pfn = start; pfn < end; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
@@ -117,72 +119,52 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
return len;
}
-static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
- const struct e820map *e820)
+static unsigned long __init xen_set_identity_and_release(
+ const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
- phys_addr_t max_addr = PFN_PHYS(max_pfn);
- phys_addr_t last_end = ISA_END_ADDRESS;
+ phys_addr_t start = 0;
unsigned long released = 0;
- int i;
-
- /* Free any unused memory above the low 1Mbyte. */
- for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
- phys_addr_t end = e820->map[i].addr;
- end = min(max_addr, end);
-
- if (last_end < end)
- released += xen_release_chunk(last_end, end);
- last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
- }
-
- if (last_end < max_addr)
- released += xen_release_chunk(last_end, max_addr);
-
- printk(KERN_INFO "released %lu pages of unused memory\n", released);
- return released;
-}
-
-static unsigned long __init xen_set_identity(const struct e820entry *list,
- ssize_t map_size)
-{
- phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
- phys_addr_t start_pci = last;
- const struct e820entry *entry;
unsigned long identity = 0;
+ const struct e820entry *entry;
int i;
+ /*
+ * Combine non-RAM regions and gaps until a RAM region (or the
+ * end of the map) is reached, then set the 1:1 map and
+ * release the pages (if available) in those non-RAM regions.
+ *
+ * The combined non-RAM regions are rounded to a whole number
+ * of pages so any partial pages are accessible via the 1:1
+ * mapping. This is needed for some BIOSes that put (for
+ * example) the DMI tables in a reserved region that begins on
+ * a non-page boundary.
+ */
for (i = 0, entry = list; i < map_size; i++, entry++) {
- phys_addr_t start = entry->addr;
- phys_addr_t end = start + entry->size;
+ phys_addr_t end = entry->addr + entry->size;
- if (start < last)
- start = last;
+ if (entry->type == E820_RAM || i == map_size - 1) {
+ unsigned long start_pfn = PFN_DOWN(start);
+ unsigned long end_pfn = PFN_UP(end);
- if (end <= start)
- continue;
+ if (entry->type == E820_RAM)
+ end_pfn = PFN_UP(entry->addr);
- /* Skip over the 1MB region. */
- if (last > end)
- continue;
+ if (start_pfn < end_pfn) {
+ if (start_pfn < nr_pages)
+ released += xen_release_chunk(
+ start_pfn, min(end_pfn, nr_pages));
- if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
- if (start > start_pci)
identity += set_phys_range_identity(
- PFN_UP(start_pci), PFN_DOWN(start));
-
- /* Without saving 'last' we would gooble RAM too
- * at the end of the loop. */
- last = end;
- start_pci = end;
- continue;
+ start_pfn, end_pfn);
+ }
+ start = end;
}
- start_pci = min(start, start_pci);
- last = end;
}
- if (last > start_pci)
- identity += set_phys_range_identity(
- PFN_UP(start_pci), PFN_DOWN(last));
- return identity;
+
+ printk(KERN_INFO "Released %lu pages of unused memory\n", released);
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
+
+ return released;
}
static unsigned long __init xen_get_max_pages(void)
@@ -197,21 +179,32 @@ static unsigned long __init xen_get_max_pages(void)
return min(max_pages, MAX_DOMAIN_PAGES);
}
+static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
+{
+ u64 end = start + size;
+
+ /* Align RAM regions to page boundaries. */
+ if (type == E820_RAM) {
+ start = PAGE_ALIGN(start);
+ end &= ~((u64)PAGE_SIZE - 1);
+ }
+
+ e820_add_region(start, end - start, type);
+}
+
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
char * __init xen_memory_setup(void)
{
static struct e820entry map[E820MAX] __initdata;
- static struct e820entry map_raw[E820MAX] __initdata;
unsigned long max_pfn = xen_start_info->nr_pages;
unsigned long long mem_end;
int rc;
struct xen_memory_map memmap;
+ unsigned long max_pages;
unsigned long extra_pages = 0;
- unsigned long extra_limit;
- unsigned long identity_pages = 0;
int i;
int op;
@@ -237,58 +230,65 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);
- memcpy(map_raw, map, sizeof(map));
- e820.nr_map = 0;
- xen_extra_mem_start = mem_end;
- for (i = 0; i < memmap.nr_entries; i++) {
- unsigned long long end;
-
- /* Guard against non-page aligned E820 entries. */
- if (map[i].type == E820_RAM)
- map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
-
- end = map[i].addr + map[i].size;
- if (map[i].type == E820_RAM && end > mem_end) {
- /* RAM off the end - may be partially included */
- u64 delta = min(map[i].size, end - mem_end);
-
- map[i].size -= delta;
- end -= delta;
-
- extra_pages += PFN_DOWN(delta);
- /*
- * Set RAM below 4GB that is not for us to be unusable.
- * This prevents "System RAM" address space from being
- * used as potential resource for I/O address (happens
- * when 'allocate_resource' is called).
- */
- if (delta &&
- (xen_initial_domain() && end < 0x100000000ULL))
- e820_add_region(end, delta, E820_UNUSABLE);
+ /* Make sure the Xen-supplied memory map is well-ordered. */
+ sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
+
+ max_pages = xen_get_max_pages();
+ if (max_pages > max_pfn)
+ extra_pages += max_pages - max_pfn;
+
+ /*
+ * Set P2M for all non-RAM pages and E820 gaps to be identity
+ * type PFNs. Any RAM pages that would be made inaccesible by
+ * this are first released.
+ */
+ xen_released_pages = xen_set_identity_and_release(
+ map, memmap.nr_entries, max_pfn);
+ extra_pages += xen_released_pages;
+
+ /*
+ * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
+ * factor the base size. On non-highmem systems, the base
+ * size is the full initial memory allocation; on highmem it
+ * is limited to the max size of lowmem, so that it doesn't
+ * get completely filled.
+ *
+ * In principle there could be a problem in lowmem systems if
+ * the initial memory is also very large with respect to
+ * lowmem, but we won't try to deal with that here.
+ */
+ extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
+ extra_pages);
+
+ i = 0;
+ while (i < memmap.nr_entries) {
+ u64 addr = map[i].addr;
+ u64 size = map[i].size;
+ u32 type = map[i].type;
+
+ if (type == E820_RAM) {
+ if (addr < mem_end) {
+ size = min(size, mem_end - addr);
+ } else if (extra_pages) {
+ size = min(size, (u64)extra_pages * PAGE_SIZE);
+ extra_pages -= size / PAGE_SIZE;
+ xen_add_extra_mem(addr, size);
+ } else
+ type = E820_UNUSABLE;
}
- if (map[i].size > 0 && end > xen_extra_mem_start)
- xen_extra_mem_start = end;
+ xen_align_and_add_e820_region(addr, size, type);
- /* Add region if any remains */
- if (map[i].size > 0)
- e820_add_region(map[i].addr, map[i].size, map[i].type);
+ map[i].addr += size;
+ map[i].size -= size;
+ if (map[i].size == 0)
+ i++;
}
- /* Align the balloon area so that max_low_pfn does not get set
- * to be at the _end_ of the PCI gap at the far end (fee01000).
- * Note that xen_extra_mem_start gets set in the loop above to be
- * past the last E820 region. */
- if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
- xen_extra_mem_start = (1ULL<<32);
/*
* In domU, the ISA region is normal, usable memory, but we
* reserve ISA memory anyway because too many things poke
* about in there.
- *
- * In Dom0, the host E820 information can leave gaps in the
- * ISA range, which would cause us to release those pages. To
- * avoid this, we unconditionally reserve them here.
*/
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
E820_RESERVED);
@@ -305,44 +305,6 @@ char * __init xen_memory_setup(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- extra_limit = xen_get_max_pages();
- if (max_pfn + extra_pages > extra_limit) {
- if (extra_limit > max_pfn)
- extra_pages = extra_limit - max_pfn;
- else
- extra_pages = 0;
- }
-
- extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
-
- /*
- * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
- * factor the base size. On non-highmem systems, the base
- * size is the full initial memory allocation; on highmem it
- * is limited to the max size of lowmem, so that it doesn't
- * get completely filled.
- *
- * In principle there could be a problem in lowmem systems if
- * the initial memory is also very large with respect to
- * lowmem, but we won't try to deal with that here.
- */
- extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
- max_pfn + extra_pages);
-
- if (extra_limit >= max_pfn)
- extra_pages = extra_limit - max_pfn;
- else
- extra_pages = 0;
-
- xen_add_extra_mem(extra_pages);
-
- /*
- * Set P2M for all non-RAM pages and E820 gaps to be identity
- * type PFNs. We supply it with the non-sanitized version
- * of the E820.
- */
- identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
- printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
return "Xen";
}
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 2330a9ad5e9..1540792b1e5 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -396,7 +396,7 @@ static int xen_blkbk_map(struct blkif_request *req,
continue;
ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
- blkbk->pending_page(pending_req, i), false);
+ blkbk->pending_page(pending_req, i), NULL);
if (ret) {
pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
(unsigned long)map[i].dev_bus_addr, ret);
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 6fa215a3861..90832a95599 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -400,9 +400,8 @@ static int pcifront_claim_resource(struct pci_dev *dev, void *data)
dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
pci_name(dev), i);
if (pci_claim_resource(dev, i)) {
- dev_err(&pdev->xdev->dev, "Could not claim "
- "resource %s/%d! Device offline. Try "
- "giving less than 4GB to domain.\n",
+ dev_err(&pdev->xdev->dev, "Could not claim resource %s/%d! "
+ "Device offline. Try using e820_host=1 in the guest config.\n",
pci_name(dev), i);
}
}
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 5f7ff8e2fc1..8795480c235 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -137,16 +137,6 @@ config XEN_GRANT_DEV_ALLOC
to other domains. This can be used to implement frontend drivers
or as part of an inter-domain shared memory channel.
-config XEN_PLATFORM_PCI
- tristate "xen platform pci device driver"
- depends on XEN_PVHVM && PCI
- default m
- help
- Driver for the Xen PCI Platform device: it is responsible for
- initializing xenbus and grant_table when running in a Xen HVM
- domain. As a consequence this driver is required to run any Xen PV
- frontend on Xen HVM.
-
config SWIOTLB_XEN
def_bool y
depends on PCI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 72bbb27d7a6..974fffdf22b 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
-obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
+obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
obj-$(CONFIG_XEN_TMEM) += tmem.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
@@ -23,5 +23,3 @@ obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
xen-gntalloc-y := gntalloc.o
-
-xen-platform-pci-y := platform-pci.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 5dfd8f8ff07..5876e1ae6c2 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -501,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
* alloc_xenballooned_pages - get pages that have been ballooned out
* @nr_pages: Number of pages to get
* @pages: pages returned
+ * @highmem: highmem or lowmem pages
* @return 0 on success, error otherwise
*/
-int alloc_xenballooned_pages(int nr_pages, struct page** pages)
+int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
{
int pgno = 0;
struct page* page;
mutex_lock(&balloon_mutex);
while (pgno < nr_pages) {
- page = balloon_retrieve(true);
- if (page) {
+ page = balloon_retrieve(highmem);
+ if (page && PageHighMem(page) == highmem) {
pages[pgno++] = page;
} else {
enum bp_state st;
- st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER);
+ if (page)
+ balloon_append(page);
+ st = decrease_reservation(nr_pages - pgno,
+ highmem ? GFP_HIGHUSER : GFP_USER);
if (st != BP_DONE)
goto out_undo;
}
@@ -555,17 +559,40 @@ void free_xenballooned_pages(int nr_pages, struct page** pages)
}
EXPORT_SYMBOL(free_xenballooned_pages);
-static int __init balloon_init(void)
+static void __init balloon_add_region(unsigned long start_pfn,
+ unsigned long pages)
{
unsigned long pfn, extra_pfn_end;
struct page *page;
+ /*
+ * If the amount of usable memory has been limited (e.g., with
+ * the 'mem' command line parameter), don't add pages beyond
+ * this limit.
+ */
+ extra_pfn_end = min(max_pfn, start_pfn + pages);
+
+ for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
+ page = pfn_to_page(pfn);
+ /* totalram_pages and totalhigh_pages do not
+ include the boot-time balloon extension, so
+ don't subtract from it. */
+ __balloon_append(page);
+ }
+}
+
+static int __init balloon_init(void)
+{
+ int i;
+
if (!xen_domain())
return -ENODEV;
pr_info("xen/balloon: Initialising balloon driver.\n");
- balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn;
+ balloon_stats.current_pages = xen_pv_domain()
+ ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
+ : max_pfn;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
@@ -584,24 +611,13 @@ static int __init balloon_init(void)
#endif
/*
- * Initialise the balloon with excess memory space. We need
- * to make sure we don't add memory which doesn't exist or
- * logically exist. The E820 map can be trimmed to be smaller
- * than the amount of physical memory due to the mem= command
- * line parameter. And if this is a 32-bit non-HIGHMEM kernel
- * on a system with memory which requires highmem to access,
- * don't try to use it.
+ * Initialize the balloon with pages from the extra memory
+ * regions (see arch/x86/xen/setup.c).
*/
- extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()),
- (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
- for (pfn = PFN_UP(xen_extra_mem_start);
- pfn < extra_pfn_end;
- pfn++) {
- page = pfn_to_page(pfn);
- /* totalram_pages and totalhigh_pages do not include the boot-time
- balloon extension, so don't subtract from it. */
- __balloon_append(page);
- }
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
+ if (xen_extra_mem[i].size)
+ balloon_add_region(PFN_UP(xen_extra_mem[i].start),
+ PFN_DOWN(xen_extra_mem[i].size));
return 0;
}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 7523719bf8a..7a55b292bf3 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -432,7 +432,8 @@ static int __must_check xen_allocate_irq_dynamic(void)
irq = irq_alloc_desc_from(first, -1);
- xen_irq_init(irq);
+ if (irq >= 0)
+ xen_irq_init(irq);
return irq;
}
@@ -713,7 +714,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
mutex_lock(&irq_mapping_update_lock);
irq = xen_allocate_irq_dynamic();
- if (irq == -1)
+ if (irq < 0)
goto out;
irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
@@ -729,7 +730,7 @@ out:
error_irq:
mutex_unlock(&irq_mapping_update_lock);
xen_free_irq(irq);
- return -1;
+ return ret;
}
#endif
@@ -779,7 +780,7 @@ int xen_irq_from_pirq(unsigned pirq)
mutex_lock(&irq_mapping_update_lock);
list_for_each_entry(info, &xen_irq_list_head, list) {
- if (info == NULL || info->type != IRQT_PIRQ)
+ if (info->type != IRQT_PIRQ)
continue;
irq = info->irq;
if (info->u.pirq.pirq == pirq)
@@ -872,11 +873,32 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
}
+static int find_virq(unsigned int virq, unsigned int cpu)
+{
+ struct evtchn_status status;
+ int port, rc = -ENOENT;
+
+ memset(&status, 0, sizeof(status));
+ for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+ status.dom = DOMID_SELF;
+ status.port = port;
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
+ if (rc < 0)
+ continue;
+ if (status.status != EVTCHNSTAT_virq)
+ continue;
+ if (status.u.virq == virq && status.vcpu == cpu) {
+ rc = port;
+ break;
+ }
+ }
+ return rc;
+}
int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
- int evtchn, irq;
+ int evtchn, irq, ret;
mutex_lock(&irq_mapping_update_lock);
@@ -892,10 +914,16 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
bind_virq.virq = virq;
bind_virq.vcpu = cpu;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
- &bind_virq) != 0)
- BUG();
- evtchn = bind_virq.port;
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq);
+ if (ret == 0)
+ evtchn = bind_virq.port;
+ else {
+ if (ret == -EEXIST)
+ ret = find_virq(virq, cpu);
+ BUG_ON(ret < 0);
+ evtchn = ret;
+ }
xen_irq_info_virq_init(cpu, irq, evtchn, virq);
@@ -1670,6 +1698,7 @@ void __init xen_init_IRQ(void)
evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
GFP_KERNEL);
+ BUG_ON(!evtchn_to_irq);
for (i = 0; i < NR_EVENT_CHANNELS; i++)
evtchn_to_irq[i] = -1;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index f914b26cf0c..880798aae2f 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -83,6 +83,7 @@ struct grant_map {
struct ioctl_gntdev_grant_ref *grants;
struct gnttab_map_grant_ref *map_ops;
struct gnttab_unmap_grant_ref *unmap_ops;
+ struct gnttab_map_grant_ref *kmap_ops;
struct page **pages;
};
@@ -116,19 +117,22 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL);
add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL);
add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
+ add->kmap_ops = kzalloc(sizeof(add->kmap_ops[0]) * count, GFP_KERNEL);
add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
+ NULL == add->kmap_ops ||
NULL == add->pages)
goto err;
- if (alloc_xenballooned_pages(count, add->pages))
+ if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */))
goto err;
for (i = 0; i < count; i++) {
add->map_ops[i].handle = -1;
add->unmap_ops[i].handle = -1;
+ add->kmap_ops[i].handle = -1;
}
add->index = 0;
@@ -142,6 +146,7 @@ err:
kfree(add->grants);
kfree(add->map_ops);
kfree(add->unmap_ops);
+ kfree(add->kmap_ops);
kfree(add);
return NULL;
}
@@ -243,10 +248,35 @@ static int map_grant_pages(struct grant_map *map)
gnttab_set_unmap_op(&map->unmap_ops[i], addr,
map->flags, -1 /* handle */);
}
+ } else {
+ /*
+ * Setup the map_ops corresponding to the pte entries pointing
+ * to the kernel linear addresses of the struct pages.
+ * These ptes are completely different from the user ptes dealt
+ * with find_grant_ptes.
+ */
+ for (i = 0; i < map->count; i++) {
+ unsigned level;
+ unsigned long address = (unsigned long)
+ pfn_to_kaddr(page_to_pfn(map->pages[i]));
+ pte_t *ptep;
+ u64 pte_maddr = 0;
+ BUG_ON(PageHighMem(map->pages[i]));
+
+ ptep = lookup_address(address, &level);
+ pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
+ gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
+ map->flags |
+ GNTMAP_host_map |
+ GNTMAP_contains_pte,
+ map->grants[i].ref,
+ map->grants[i].domid);
+ }
}
pr_debug("map %d+%d\n", map->index, map->count);
- err = gnttab_map_refs(map->map_ops, map->pages, map->count);
+ err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
+ map->pages, map->count);
if (err)
return err;
@@ -462,13 +492,11 @@ static int gntdev_release(struct inode *inode, struct file *flip)
pr_debug("priv %p\n", priv);
- spin_lock(&priv->lock);
while (!list_empty(&priv->maps)) {
map = list_entry(priv->maps.next, struct grant_map, next);
list_del(&map->next);
gntdev_put_map(map);
}
- spin_unlock(&priv->lock);
if (use_ptemod)
mmu_notifier_unregister(&priv->mn, priv->mm);
@@ -532,10 +560,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
if (map) {
list_del(&map->next);
- gntdev_put_map(map);
err = 0;
}
spin_unlock(&priv->lock);
+ if (map)
+ gntdev_put_map(map);
return err;
}
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 4f44b347b24..8c71ab80175 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -448,7 +448,8 @@ unsigned int gnttab_max_grant_frames(void)
EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
- struct page **pages, unsigned int count)
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count)
{
int i, ret;
pte_t *pte;
@@ -488,8 +489,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
*/
return -EOPNOTSUPP;
}
- ret = m2p_add_override(mfn, pages[i],
- map_ops[i].flags & GNTMAP_contains_pte);
+ ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
if (ret)
return ret;
}
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index cef4bafc07d..66057075d6e 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -18,6 +18,7 @@
*/
#include <linux/pci.h>
+#include <linux/acpi.h>
#include <xen/xen.h>
#include <xen/interface/physdev.h>
#include <xen/interface/xen.h>
@@ -26,26 +27,85 @@
#include <asm/xen/hypercall.h>
#include "../pci/pci.h"
+static bool __read_mostly pci_seg_supported = true;
+
static int xen_add_device(struct device *dev)
{
int r;
struct pci_dev *pci_dev = to_pci_dev(dev);
+#ifdef CONFIG_PCI_IOV
+ struct pci_dev *physfn = pci_dev->physfn;
+#endif
+
+ if (pci_seg_supported) {
+ struct physdev_pci_device_add add = {
+ .seg = pci_domain_nr(pci_dev->bus),
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn
+ };
+#ifdef CONFIG_ACPI
+ acpi_handle handle;
+#endif
+
+#ifdef CONFIG_PCI_IOV
+ if (pci_dev->is_virtfn) {
+ add.flags = XEN_PCI_DEV_VIRTFN;
+ add.physfn.bus = physfn->bus->number;
+ add.physfn.devfn = physfn->devfn;
+ } else
+#endif
+ if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
+ add.flags = XEN_PCI_DEV_EXTFN;
+
+#ifdef CONFIG_ACPI
+ handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+ if (!handle)
+ handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+#ifdef CONFIG_PCI_IOV
+ if (!handle && pci_dev->is_virtfn)
+ handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+#endif
+ if (handle) {
+ acpi_status status;
+
+ do {
+ unsigned long long pxm;
+
+ status = acpi_evaluate_integer(handle, "_PXM",
+ NULL, &pxm);
+ if (ACPI_SUCCESS(status)) {
+ add.optarr[0] = pxm;
+ add.flags |= XEN_PCI_DEV_PXM;
+ break;
+ }
+ status = acpi_get_parent(handle, &handle);
+ } while (ACPI_SUCCESS(status));
+ }
+#endif /* CONFIG_ACPI */
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
+ if (r != -ENOSYS)
+ return r;
+ pci_seg_supported = false;
+ }
+ if (pci_domain_nr(pci_dev->bus))
+ r = -ENOSYS;
#ifdef CONFIG_PCI_IOV
- if (pci_dev->is_virtfn) {
+ else if (pci_dev->is_virtfn) {
struct physdev_manage_pci_ext manage_pci_ext = {
.bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
.is_virtfn = 1,
- .physfn.bus = pci_dev->physfn->bus->number,
- .physfn.devfn = pci_dev->physfn->devfn,
+ .physfn.bus = physfn->bus->number,
+ .physfn.devfn = physfn->devfn,
};
r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
&manage_pci_ext);
- } else
+ }
#endif
- if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
+ else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
struct physdev_manage_pci_ext manage_pci_ext = {
.bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
@@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev)
{
int r;
struct pci_dev *pci_dev = to_pci_dev(dev);
- struct physdev_manage_pci manage_pci;
- manage_pci.bus = pci_dev->bus->number;
- manage_pci.devfn = pci_dev->devfn;
+ if (pci_seg_supported) {
+ struct physdev_pci_device device = {
+ .seg = pci_domain_nr(pci_dev->bus),
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn
+ };
- r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
- &manage_pci);
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove,
+ &device);
+ } else if (pci_domain_nr(pci_dev->bus))
+ r = -ENOSYS;
+ else {
+ struct physdev_manage_pci manage_pci = {
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn
+ };
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
+ &manage_pci);
+ }
return r;
}
@@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb,
r = xen_remove_device(dev);
break;
default:
- break;
+ return NOTIFY_DONE;
}
-
- return r;
+ if (r)
+ dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n",
+ action == BUS_NOTIFY_ADD_DEVICE ? "add" :
+ (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?"));
+ return NOTIFY_OK;
}
-struct notifier_block device_nb = {
+static struct notifier_block device_nb = {
.notifier_call = xen_pci_notifier,
};
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 6e8c15a2320..c984768d98c 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -38,6 +38,7 @@
#include <xen/swiotlb-xen.h>
#include <xen/page.h>
#include <xen/xen-ops.h>
+#include <xen/hvc-console.h>
/*
* Used to do a quick range check in swiotlb_tbl_unmap_single and
* swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -146,8 +147,10 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
void __init xen_swiotlb_init(int verbose)
{
unsigned long bytes;
- int rc;
+ int rc = -ENOMEM;
unsigned long nr_tbl;
+ char *m = NULL;
+ unsigned int repeat = 3;
nr_tbl = swioltb_nr_tbl();
if (nr_tbl)
@@ -156,16 +159,17 @@ void __init xen_swiotlb_init(int verbose)
xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
}
-
+retry:
bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
/*
* Get IO TLB memory from any location.
*/
xen_io_tlb_start = alloc_bootmem(bytes);
- if (!xen_io_tlb_start)
- panic("Cannot allocate SWIOTLB buffer");
-
+ if (!xen_io_tlb_start) {
+ m = "Cannot allocate Xen-SWIOTLB buffer!\n";
+ goto error;
+ }
xen_io_tlb_end = xen_io_tlb_start + bytes;
/*
* And replace that memory with pages under 4GB.
@@ -173,17 +177,28 @@ void __init xen_swiotlb_init(int verbose)
rc = xen_swiotlb_fixup(xen_io_tlb_start,
bytes,
xen_io_tlb_nslabs);
- if (rc)
+ if (rc) {
+ free_bootmem(__pa(xen_io_tlb_start), bytes);
+ m = "Failed to get contiguous memory for DMA from Xen!\n"\
+ "You either: don't have the permissions, do not have"\
+ " enough free memory under 4GB, or the hypervisor memory"\
+ "is too fragmented!";
goto error;
-
+ }
start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
return;
error:
- panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\
- "We either don't have the permission or you do not have enough"\
- "free memory under 4GB!\n", rc);
+ if (repeat--) {
+ xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
+ (xen_io_tlb_nslabs >> 1));
+ printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n",
+ (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+ goto retry;
+ }
+ xen_raw_printk("%s (rc:%d)", m, rc);
+ panic("%s (rc:%d)", m, rc);
}
void *
@@ -194,6 +209,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
int order = get_order(size);
u64 dma_mask = DMA_BIT_MASK(32);
unsigned long vstart;
+ phys_addr_t phys;
+ dma_addr_t dev_addr;
/*
* Ignore region specifiers - the kernel's ideas of
@@ -209,18 +226,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
vstart = __get_free_pages(flags, order);
ret = (void *)vstart;
+ if (!ret)
+ return ret;
+
if (hwdev && hwdev->coherent_dma_mask)
- dma_mask = dma_alloc_coherent_mask(hwdev, flags);
+ dma_mask = hwdev->coherent_dma_mask;
- if (ret) {
+ phys = virt_to_phys(ret);
+ dev_addr = xen_phys_to_bus(phys);
+ if (((dev_addr + size - 1 <= dma_mask)) &&
+ !range_straddles_page_boundary(phys, size))
+ *dma_handle = dev_addr;
+ else {
if (xen_create_contiguous_region(vstart, order,
fls64(dma_mask)) != 0) {
free_pages(vstart, order);
return NULL;
}
- memset(ret, 0, size);
*dma_handle = virt_to_machine(ret).maddr;
}
+ memset(ret, 0, size);
return ret;
}
EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
@@ -230,11 +255,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
dma_addr_t dev_addr)
{
int order = get_order(size);
+ phys_addr_t phys;
+ u64 dma_mask = DMA_BIT_MASK(32);
if (dma_release_from_coherent(hwdev, order, vaddr))
return;
- xen_destroy_contiguous_region((unsigned long)vaddr, order);
+ if (hwdev && hwdev->coherent_dma_mask)
+ dma_mask = hwdev->coherent_dma_mask;
+
+ phys = virt_to_phys(vaddr);
+
+ if (((dev_addr + size - 1 > dma_mask)) ||
+ range_straddles_page_boundary(phys, size))
+ xen_destroy_contiguous_region((unsigned long)vaddr, order);
+
free_pages((unsigned long)vaddr, order);
}
EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
@@ -278,9 +313,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
/*
* Ensure that the address returned is DMA'ble
*/
- if (!dma_capable(dev, dev_addr, size))
- panic("map_single: bounce buffer is not DMA'ble");
-
+ if (!dma_capable(dev, dev_addr, size)) {
+ swiotlb_tbl_unmap_single(dev, map, size, dir);
+ dev_addr = 0;
+ }
return dev_addr;
}
EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index a8031445d94..444345afbd5 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -15,7 +15,6 @@
#include "conf_space.h"
#include "conf_space_quirks.h"
-#define DRV_NAME "xen-pciback"
static int permissive;
module_param(permissive, bool, 0644);
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index da3cbdfcb5d..3daf862d739 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -15,7 +15,6 @@ struct pci_bar_info {
int which;
};
-#define DRV_NAME "xen-pciback"
#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
@@ -25,7 +24,7 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
int ret;
ret = xen_pcibk_read_config_word(dev, offset, value, data);
- if (!atomic_read(&dev->enable_cnt))
+ if (!pci_is_enabled(dev))
return ret;
for (i = 0; i < PCI_ROM_RESOURCE; i++) {
@@ -187,7 +186,7 @@ static inline void read_dev_bar(struct pci_dev *dev,
bar_info->val = res[pos].start |
(res[pos].flags & PCI_REGION_FLAG_MASK);
- bar_info->len_val = res[pos].end - res[pos].start + 1;
+ bar_info->len_val = resource_size(&res[pos]);
}
static void *bar_init(struct pci_dev *dev, int offset)
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c
index 921a889e65e..7476791cab4 100644
--- a/drivers/xen/xen-pciback/conf_space_quirks.c
+++ b/drivers/xen/xen-pciback/conf_space_quirks.c
@@ -12,7 +12,6 @@
#include "conf_space_quirks.h"
LIST_HEAD(xen_pcibk_quirks);
-#define DRV_NAME "xen-pciback"
static inline const struct pci_device_id *
match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
{
@@ -36,7 +35,7 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
goto out;
tmp_quirk = NULL;
printk(KERN_DEBUG DRV_NAME
- ":quirk didn't match any device xen_pciback knows about\n");
+ ": quirk didn't match any device known\n");
out:
return tmp_quirk;
}
diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
index 1d32a9a42c0..828dddc360d 100644
--- a/drivers/xen/xen-pciback/passthrough.c
+++ b/drivers/xen/xen-pciback/passthrough.c
@@ -7,13 +7,13 @@
#include <linux/list.h>
#include <linux/pci.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include "pciback.h"
struct passthrough_dev_data {
/* Access to dev_list must be protected by lock */
struct list_head dev_list;
- spinlock_t lock;
+ struct mutex lock;
};
static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
@@ -24,9 +24,8 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry;
struct pci_dev *dev = NULL;
- unsigned long flags;
- spin_lock_irqsave(&dev_data->lock, flags);
+ mutex_lock(&dev_data->lock);
list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
@@ -37,7 +36,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
}
}
- spin_unlock_irqrestore(&dev_data->lock, flags);
+ mutex_unlock(&dev_data->lock);
return dev;
}
@@ -48,7 +47,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry;
- unsigned long flags;
unsigned int domain, bus, devfn;
int err;
@@ -57,9 +55,9 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
return -ENOMEM;
dev_entry->dev = dev;
- spin_lock_irqsave(&dev_data->lock, flags);
+ mutex_lock(&dev_data->lock);
list_add_tail(&dev_entry->list, &dev_data->dev_list);
- spin_unlock_irqrestore(&dev_data->lock, flags);
+ mutex_unlock(&dev_data->lock);
/* Publish this device. */
domain = (unsigned int)pci_domain_nr(dev->bus);
@@ -76,9 +74,8 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *t;
struct pci_dev *found_dev = NULL;
- unsigned long flags;
- spin_lock_irqsave(&dev_data->lock, flags);
+ mutex_lock(&dev_data->lock);
list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
if (dev_entry->dev == dev) {
@@ -88,7 +85,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
}
}
- spin_unlock_irqrestore(&dev_data->lock, flags);
+ mutex_unlock(&dev_data->lock);
if (found_dev)
pcistub_put_pci_dev(found_dev);
@@ -102,7 +99,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
if (!dev_data)
return -ENOMEM;
- spin_lock_init(&dev_data->lock);
+ mutex_init(&dev_data->lock);
INIT_LIST_HEAD(&dev_data->dev_list);
@@ -116,14 +113,14 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
{
int err = 0;
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
- struct pci_dev_entry *dev_entry, *e, *tmp;
+ struct pci_dev_entry *dev_entry, *e;
struct pci_dev *dev;
int found;
unsigned int domain, bus;
- spin_lock(&dev_data->lock);
+ mutex_lock(&dev_data->lock);
- list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) {
+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
/* Only publish this device as a root if none of its
* parent bridges are exported
*/
@@ -142,16 +139,13 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
bus = (unsigned int)dev_entry->dev->bus->number;
if (!found) {
- spin_unlock(&dev_data->lock);
err = publish_root_cb(pdev, domain, bus);
if (err)
break;
- spin_lock(&dev_data->lock);
}
}
- if (!err)
- spin_unlock(&dev_data->lock);
+ mutex_unlock(&dev_data->lock);
return err;
}
@@ -182,7 +176,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
return 1;
}
-struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
+const struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
.name = "passthrough",
.init = __xen_pcibk_init_devices,
.free = __xen_pcibk_release_devices,
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index aec214ac0a1..8f06e1ed028 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -21,8 +21,6 @@
#include "conf_space.h"
#include "conf_space_quirks.h"
-#define DRV_NAME "xen-pciback"
-
static char *pci_devs_to_hide;
wait_queue_head_t xen_pcibk_aer_wait_queue;
/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
@@ -222,6 +220,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
}
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ if (WARN_ON(!found_psdev))
+ return;
/*hold this lock for avoiding breaking link between
* pcistub and xen_pcibk when AER is in processing
@@ -514,12 +514,9 @@ static void kill_domain_by_device(struct pcistub_device *psdev)
int err;
char nodename[PCI_NODENAME_MAX];
- if (!psdev)
- dev_err(&psdev->dev->dev,
- "device is NULL when do AER recovery/kill_domain\n");
+ BUG_ON(!psdev);
snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
psdev->pdev->xdev->otherend_id);
- nodename[strlen(nodename)] = '\0';
again:
err = xenbus_transaction_start(&xbt);
@@ -605,7 +602,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
if (test_bit(_XEN_PCIF_active,
(unsigned long *)&psdev->pdev->sh_info->flags)) {
dev_dbg(&psdev->dev->dev,
- "schedule pci_conf service in xen_pcibk\n");
+ "schedule pci_conf service in " DRV_NAME "\n");
xen_pcibk_test_and_schedule_op(psdev->pdev);
}
@@ -995,8 +992,7 @@ out:
err = count;
return err;
}
-
-DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
+static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
size_t count)
@@ -1015,8 +1011,7 @@ out:
err = count;
return err;
}
-
-DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
+static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
{
@@ -1039,8 +1034,7 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
return count;
}
-
-DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
+static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
{
@@ -1069,8 +1063,7 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
return count;
}
-
-DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
+static DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
const char *buf,
@@ -1106,7 +1099,8 @@ out:
err = count;
return err;
}
-DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
+static DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL,
+ pcistub_irq_handler_switch);
static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
size_t count)
@@ -1170,8 +1164,8 @@ out:
return count;
}
-
-DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
+static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show,
+ pcistub_quirk_add);
static ssize_t permissive_add(struct device_driver *drv, const char *buf,
size_t count)
@@ -1236,8 +1230,8 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
return count;
}
-
-DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
+static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show,
+ permissive_add);
static void pcistub_exit(void)
{
@@ -1374,3 +1368,4 @@ module_init(xen_pcibk_init);
module_exit(xen_pcibk_cleanup);
MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("xen-backend:pci");
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index a0e131a8150..e9b4011c5f9 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -15,6 +15,8 @@
#include <linux/atomic.h>
#include <xen/interface/io/pciif.h>
+#define DRV_NAME "xen-pciback"
+
struct pci_dev_entry {
struct list_head list;
struct pci_dev *dev;
@@ -27,7 +29,7 @@ struct pci_dev_entry {
struct xen_pcibk_device {
void *pci_dev_data;
- spinlock_t dev_lock;
+ struct mutex dev_lock;
struct xenbus_device *xdev;
struct xenbus_watch be_watch;
u8 be_watching;
@@ -89,7 +91,7 @@ typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
* passthrough - BDFs are exactly like in the host.
*/
struct xen_pcibk_backend {
- char *name;
+ const char *name;
int (*init)(struct xen_pcibk_device *pdev);
void (*free)(struct xen_pcibk_device *pdev);
int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
@@ -104,9 +106,9 @@ struct xen_pcibk_backend {
unsigned int devfn);
};
-extern struct xen_pcibk_backend xen_pcibk_vpci_backend;
-extern struct xen_pcibk_backend xen_pcibk_passthrough_backend;
-extern struct xen_pcibk_backend *xen_pcibk_backend;
+extern const struct xen_pcibk_backend xen_pcibk_vpci_backend;
+extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend;
+extern const struct xen_pcibk_backend *xen_pcibk_backend;
static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev,
@@ -116,13 +118,14 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
if (xen_pcibk_backend && xen_pcibk_backend->add)
return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
return -1;
-};
+}
+
static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
{
if (xen_pcibk_backend && xen_pcibk_backend->free)
return xen_pcibk_backend->release(pdev, dev);
-};
+}
static inline struct pci_dev *
xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
@@ -131,7 +134,8 @@ xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
if (xen_pcibk_backend && xen_pcibk_backend->get)
return xen_pcibk_backend->get(pdev, domain, bus, devfn);
return NULL;
-};
+}
+
/**
* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
* before sending aer request to pcifront, so that guest could identify
@@ -148,25 +152,29 @@ static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
devfn);
return -1;
-};
+}
+
static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
{
if (xen_pcibk_backend && xen_pcibk_backend->init)
return xen_pcibk_backend->init(pdev);
return -1;
-};
+}
+
static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
publish_pci_root_cb cb)
{
if (xen_pcibk_backend && xen_pcibk_backend->publish)
return xen_pcibk_backend->publish(pdev, cb);
return -1;
-};
+}
+
static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
{
if (xen_pcibk_backend && xen_pcibk_backend->free)
return xen_pcibk_backend->free(pdev);
-};
+}
+
/* Handles events from front-end */
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data);
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 8c95c3415b7..63616d7453e 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -10,7 +10,6 @@
#include <linux/sched.h>
#include "pciback.h"
-#define DRV_NAME "xen-pciback"
int verbose_request;
module_param(verbose_request, int, 0644);
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 4a42cfb0959..46d140baebd 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -8,16 +8,15 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/pci.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include "pciback.h"
#define PCI_SLOT_MAX 32
-#define DRV_NAME "xen-pciback"
struct vpci_dev_data {
/* Access to dev_list must be protected by lock */
struct list_head dev_list[PCI_SLOT_MAX];
- spinlock_t lock;
+ struct mutex lock;
};
static inline struct list_head *list_first(struct list_head *head)
@@ -33,13 +32,12 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev_entry *entry;
struct pci_dev *dev = NULL;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
- unsigned long flags;
if (domain != 0 || bus != 0)
return NULL;
if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
- spin_lock_irqsave(&vpci_dev->lock, flags);
+ mutex_lock(&vpci_dev->lock);
list_for_each_entry(entry,
&vpci_dev->dev_list[PCI_SLOT(devfn)],
@@ -50,7 +48,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
}
}
- spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ mutex_unlock(&vpci_dev->lock);
}
return dev;
}
@@ -71,7 +69,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
int err = 0, slot, func = -1;
struct pci_dev_entry *t, *dev_entry;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
- unsigned long flags;
if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
err = -EFAULT;
@@ -90,7 +87,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
dev_entry->dev = dev;
- spin_lock_irqsave(&vpci_dev->lock, flags);
+ mutex_lock(&vpci_dev->lock);
/* Keep multi-function devices together on the virtual PCI bus */
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
@@ -129,7 +126,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
"No more space on root virtual PCI bus");
unlock:
- spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ mutex_unlock(&vpci_dev->lock);
/* Publish this device. */
if (!err)
@@ -145,14 +142,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
int slot;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
struct pci_dev *found_dev = NULL;
- unsigned long flags;
- spin_lock_irqsave(&vpci_dev->lock, flags);
+ mutex_lock(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
- struct pci_dev_entry *e, *tmp;
- list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
- list) {
+ struct pci_dev_entry *e;
+
+ list_for_each_entry(e, &vpci_dev->dev_list[slot], list) {
if (e->dev == dev) {
list_del(&e->list);
found_dev = e->dev;
@@ -163,7 +159,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
}
out:
- spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ mutex_unlock(&vpci_dev->lock);
if (found_dev)
pcistub_put_pci_dev(found_dev);
@@ -178,7 +174,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
if (!vpci_dev)
return -ENOMEM;
- spin_lock_init(&vpci_dev->lock);
+ mutex_init(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++)
INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
@@ -222,10 +218,9 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
struct pci_dev_entry *entry;
struct pci_dev *dev = NULL;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
- unsigned long flags;
int found = 0, slot;
- spin_lock_irqsave(&vpci_dev->lock, flags);
+ mutex_lock(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
list_for_each_entry(entry,
&vpci_dev->dev_list[slot],
@@ -243,11 +238,11 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
}
}
}
- spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ mutex_unlock(&vpci_dev->lock);
return found;
}
-struct xen_pcibk_backend xen_pcibk_vpci_backend = {
+const struct xen_pcibk_backend xen_pcibk_vpci_backend = {
.name = "vpci",
.init = __xen_pcibk_init_devices,
.free = __xen_pcibk_release_devices,
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 978d2c6f5dc..474d52ec337 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -13,7 +13,6 @@
#include <asm/xen/pci.h>
#include "pciback.h"
-#define DRV_NAME "xen-pciback"
#define INVALID_EVTCHN_IRQ (-1)
struct workqueue_struct *xen_pcibk_wq;
@@ -44,7 +43,7 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
pdev->xdev = xdev;
dev_set_drvdata(&xdev->dev, pdev);
- spin_lock_init(&pdev->dev_lock);
+ mutex_init(&pdev->dev_lock);
pdev->sh_info = NULL;
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
@@ -62,14 +61,12 @@ out:
static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
{
- spin_lock(&pdev->dev_lock);
-
+ mutex_lock(&pdev->dev_lock);
/* Ensure the guest can't trigger our handler before removing devices */
if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
unbind_from_irqhandler(pdev->evtchn_irq, pdev);
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
}
- spin_unlock(&pdev->dev_lock);
/* If the driver domain started an op, make sure we complete it
* before releasing the shared memory */
@@ -77,13 +74,11 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
/* Note, the workqueue does not use spinlocks at all.*/
flush_workqueue(xen_pcibk_wq);
- spin_lock(&pdev->dev_lock);
if (pdev->sh_info != NULL) {
xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
pdev->sh_info = NULL;
}
- spin_unlock(&pdev->dev_lock);
-
+ mutex_unlock(&pdev->dev_lock);
}
static void free_pdev(struct xen_pcibk_device *pdev)
@@ -120,9 +115,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
goto out;
}
- spin_lock(&pdev->dev_lock);
pdev->sh_info = vaddr;
- spin_unlock(&pdev->dev_lock);
err = bind_interdomain_evtchn_to_irqhandler(
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
@@ -132,10 +125,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
"Error binding event channel to IRQ");
goto out;
}
-
- spin_lock(&pdev->dev_lock);
pdev->evtchn_irq = err;
- spin_unlock(&pdev->dev_lock);
err = 0;
dev_dbg(&pdev->xdev->dev, "Attached!\n");
@@ -150,6 +140,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
char *magic = NULL;
+ mutex_lock(&pdev->dev_lock);
/* Make sure we only do this setup once */
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateInitialised)
@@ -176,7 +167,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
xenbus_dev_fatal(pdev->xdev, -EFAULT,
"version mismatch (%s/%s) with pcifront - "
- "halting xen_pcibk",
+ "halting " DRV_NAME,
magic, XEN_PCI_MAGIC);
goto out;
}
@@ -194,6 +185,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
out:
+ mutex_unlock(&pdev->dev_lock);
kfree(magic);
@@ -369,6 +361,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
+ mutex_lock(&pdev->dev_lock);
/* Make sure we only reconfigure once */
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateReconfiguring)
@@ -506,6 +499,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
}
out:
+ mutex_unlock(&pdev->dev_lock);
return 0;
}
@@ -562,6 +556,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
char dev_str[64];
char state_str[64];
+ mutex_lock(&pdev->dev_lock);
/* It's possible we could get the call to setup twice, so make sure
* we're not already connected.
*/
@@ -642,10 +637,10 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
"Error switching to initialised state!");
out:
+ mutex_unlock(&pdev->dev_lock);
if (!err)
/* see if pcifront is already configured (if not, we'll wait) */
xen_pcibk_attach(pdev);
-
return err;
}
@@ -724,7 +719,7 @@ static struct xenbus_driver xenbus_xen_pcibk_driver = {
.otherend_changed = xen_pcibk_frontend_changed,
};
-struct xen_pcibk_backend *xen_pcibk_backend;
+const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
int __init xen_pcibk_xenbus_register(void)
{
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 6ea852e2516..d93c70857e0 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -68,6 +68,8 @@
*/
#include <linux/kernel.h>
+#include <linux/bootmem.h>
+#include <linux/swap.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/module.h>
@@ -93,6 +95,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1;
/* In HZ, controls frequency of worker invocation. */
static unsigned int selfballoon_interval __read_mostly = 5;
+/*
+ * Minimum usable RAM in MB for selfballooning target for balloon.
+ * If non-zero, it is added to totalreserve_pages and self-ballooning
+ * will not balloon below the sum. If zero, a piecewise linear function
+ * is calculated as a minimum and added to totalreserve_pages. Note that
+ * setting this value indiscriminately may cause OOMs and crashes.
+ */
+static unsigned int selfballoon_min_usable_mb;
+
static void selfballoon_process(struct work_struct *work);
static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
@@ -189,20 +200,23 @@ static int __init xen_selfballooning_setup(char *s)
__setup("selfballooning", xen_selfballooning_setup);
#endif /* CONFIG_FRONTSWAP */
+#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
+
/*
* Use current balloon size, the goal (vm_committed_as), and hysteresis
* parameters to set a new target balloon size
*/
static void selfballoon_process(struct work_struct *work)
{
- unsigned long cur_pages, goal_pages, tgt_pages;
+ unsigned long cur_pages, goal_pages, tgt_pages, floor_pages;
+ unsigned long useful_pages;
bool reset_timer = false;
if (xen_selfballooning_enabled) {
- cur_pages = balloon_stats.current_pages;
+ cur_pages = totalram_pages;
tgt_pages = cur_pages; /* default is no change */
goal_pages = percpu_counter_read_positive(&vm_committed_as) +
- balloon_stats.current_pages - totalram_pages;
+ totalreserve_pages;
#ifdef CONFIG_FRONTSWAP
/* allow space for frontswap pages to be repatriated */
if (frontswap_selfshrinking && frontswap_enabled)
@@ -217,7 +231,26 @@ static void selfballoon_process(struct work_struct *work)
((goal_pages - cur_pages) /
selfballoon_uphysteresis);
/* else if cur_pages == goal_pages, no change */
- balloon_set_new_target(tgt_pages);
+ useful_pages = max_pfn - totalreserve_pages;
+ if (selfballoon_min_usable_mb != 0)
+ floor_pages = totalreserve_pages +
+ MB2PAGES(selfballoon_min_usable_mb);
+ /* piecewise linear function ending in ~3% slope */
+ else if (useful_pages < MB2PAGES(16))
+ floor_pages = max_pfn; /* not worth ballooning */
+ else if (useful_pages < MB2PAGES(64))
+ floor_pages = totalreserve_pages + MB2PAGES(16) +
+ ((useful_pages - MB2PAGES(16)) >> 1);
+ else if (useful_pages < MB2PAGES(512))
+ floor_pages = totalreserve_pages + MB2PAGES(40) +
+ ((useful_pages - MB2PAGES(40)) >> 3);
+ else /* useful_pages >= MB2PAGES(512) */
+ floor_pages = totalreserve_pages + MB2PAGES(99) +
+ ((useful_pages - MB2PAGES(99)) >> 5);
+ if (tgt_pages < floor_pages)
+ tgt_pages = floor_pages;
+ balloon_set_new_target(tgt_pages +
+ balloon_stats.current_pages - totalram_pages);
reset_timer = true;
}
#ifdef CONFIG_FRONTSWAP
@@ -340,6 +373,31 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev,
static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
show_selfballoon_uphys, store_selfballoon_uphys);
+SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n",
+ selfballoon_min_usable_mb);
+
+static ssize_t store_selfballoon_min_usable_mb(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ err = strict_strtoul(buf, 10, &val);
+ if (err || val == 0)
+ return -EINVAL;
+ selfballoon_min_usable_mb = val;
+ return count;
+}
+
+static SYSDEV_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR,
+ show_selfballoon_min_usable_mb,
+ store_selfballoon_min_usable_mb);
+
+
#ifdef CONFIG_FRONTSWAP
SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
@@ -421,6 +479,7 @@ static struct attribute *selfballoon_attrs[] = {
&attr_selfballoon_interval.attr,
&attr_selfballoon_downhysteresis.attr,
&attr_selfballoon_uphysteresis.attr,
+ &attr_selfballoon_min_usable_mb.attr,
#ifdef CONFIG_FRONTSWAP
&attr_frontswap_selfshrinking.attr,
&attr_frontswap_hysteresis.attr,
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 090c61ee8fd..2eff7a6aaa2 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -212,7 +212,9 @@ int xb_init_comms(void)
printk(KERN_WARNING "XENBUS response ring is not quiescent "
"(%08x:%08x): fixing up\n",
intf->rsp_cons, intf->rsp_prod);
- intf->rsp_cons = intf->rsp_prod;
+ /* breaks kdump */
+ if (!reset_devices)
+ intf->rsp_cons = intf->rsp_prod;
}
if (xenbus_irq) {
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index bd2f90c9ac8..cef9b0bf63d 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -684,64 +684,74 @@ static int __init xenbus_probe_initcall(void)
device_initcall(xenbus_probe_initcall);
-static int __init xenbus_init(void)
+/* Set up event channel for xenstored which is run as a local process
+ * (this is normally used only in dom0)
+ */
+static int __init xenstored_local_init(void)
{
int err = 0;
unsigned long page = 0;
+ struct evtchn_alloc_unbound alloc_unbound;
- DPRINTK("");
+ /* Allocate Xenstore page */
+ page = get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ goto out_err;
- err = -ENODEV;
- if (!xen_domain())
- return err;
+ xen_store_mfn = xen_start_info->store_mfn =
+ pfn_to_mfn(virt_to_phys((void *)page) >>
+ PAGE_SHIFT);
- /*
- * Domain0 doesn't have a store_evtchn or store_mfn yet.
- */
- if (xen_initial_domain()) {
- struct evtchn_alloc_unbound alloc_unbound;
+ /* Next allocate a local port which xenstored can bind to */
+ alloc_unbound.dom = DOMID_SELF;
+ alloc_unbound.remote_dom = DOMID_SELF;
- /* Allocate Xenstore page */
- page = get_zeroed_page(GFP_KERNEL);
- if (!page)
- goto out_error;
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+ &alloc_unbound);
+ if (err == -ENOSYS)
+ goto out_err;
- xen_store_mfn = xen_start_info->store_mfn =
- pfn_to_mfn(virt_to_phys((void *)page) >>
- PAGE_SHIFT);
+ BUG_ON(err);
+ xen_store_evtchn = xen_start_info->store_evtchn =
+ alloc_unbound.port;
- /* Next allocate a local port which xenstored can bind to */
- alloc_unbound.dom = DOMID_SELF;
- alloc_unbound.remote_dom = 0;
+ return 0;
- err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
- &alloc_unbound);
- if (err == -ENOSYS)
- goto out_error;
+ out_err:
+ if (page != 0)
+ free_page(page);
+ return err;
+}
- BUG_ON(err);
- xen_store_evtchn = xen_start_info->store_evtchn =
- alloc_unbound.port;
+static int __init xenbus_init(void)
+{
+ int err = 0;
- xen_store_interface = mfn_to_virt(xen_store_mfn);
+ if (!xen_domain())
+ return -ENODEV;
+
+ if (xen_hvm_domain()) {
+ uint64_t v = 0;
+ err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+ if (err)
+ goto out_error;
+ xen_store_evtchn = (int)v;
+ err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+ if (err)
+ goto out_error;
+ xen_store_mfn = (unsigned long)v;
+ xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
} else {
- if (xen_hvm_domain()) {
- uint64_t v = 0;
- err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
- if (err)
- goto out_error;
- xen_store_evtchn = (int)v;
- err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ if (xen_store_evtchn)
+ xenstored_ready = 1;
+ else {
+ err = xenstored_local_init();
if (err)
goto out_error;
- xen_store_mfn = (unsigned long)v;
- xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
- } else {
- xen_store_evtchn = xen_start_info->store_evtchn;
- xen_store_mfn = xen_start_info->store_mfn;
- xen_store_interface = mfn_to_virt(xen_store_mfn);
- xenstored_ready = 1;
}
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
}
/* Initialize the interface to xenstore. */
@@ -760,12 +770,7 @@ static int __init xenbus_init(void)
proc_mkdir("xen", NULL);
#endif
- return 0;
-
- out_error:
- if (page != 0)
- free_page(page);
-
+ out_error:
return err;
}
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 60adf919d78..32417b5064f 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -104,8 +104,6 @@ static int xenbus_uevent_backend(struct device *dev,
xdev = to_xenbus_device(dev);
bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
- if (xdev == NULL)
- return -ENODEV;
if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
return -ENOMEM;
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index ed2ba474a56..540587e18a9 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -248,10 +248,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv,
}
EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
+static int backend_state;
+
+static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
+ const char **v, unsigned int l)
+{
+ xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
+ printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+ v[XS_WATCH_PATH], xenbus_strstate(backend_state));
+ wake_up(&backend_state_wq);
+}
+
+static void xenbus_reset_wait_for_backend(char *be, int expected)
+{
+ long timeout;
+ timeout = wait_event_interruptible_timeout(backend_state_wq,
+ backend_state == expected, 5 * HZ);
+ if (timeout <= 0)
+ printk(KERN_INFO "XENBUS: backend %s timed out.\n", be);
+}
+
+/*
+ * Reset frontend if it is in Connected or Closed state.
+ * Wait for backend to catch up.
+ * State Connected happens during kdump, Closed after kexec.
+ */
+static void xenbus_reset_frontend(char *fe, char *be, int be_state)
+{
+ struct xenbus_watch be_watch;
+
+ printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+ be, xenbus_strstate(be_state));
+
+ memset(&be_watch, 0, sizeof(be_watch));
+ be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
+ if (!be_watch.node)
+ return;
+
+ be_watch.callback = xenbus_reset_backend_state_changed;
+ backend_state = XenbusStateUnknown;
+
+ printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
+ register_xenbus_watch(&be_watch);
+
+ /* fall through to forward backend to state XenbusStateInitialising */
+ switch (be_state) {
+ case XenbusStateConnected:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
+ xenbus_reset_wait_for_backend(be, XenbusStateClosing);
+
+ case XenbusStateClosing:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
+ xenbus_reset_wait_for_backend(be, XenbusStateClosed);
+
+ case XenbusStateClosed:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
+ xenbus_reset_wait_for_backend(be, XenbusStateInitWait);
+ }
+
+ unregister_xenbus_watch(&be_watch);
+ printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
+ kfree(be_watch.node);
+}
+
+static void xenbus_check_frontend(char *class, char *dev)
+{
+ int be_state, fe_state, err;
+ char *backend, *frontend;
+
+ frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
+ if (!frontend)
+ return;
+
+ err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
+ if (err != 1)
+ goto out;
+
+ switch (fe_state) {
+ case XenbusStateConnected:
+ case XenbusStateClosed:
+ printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
+ frontend, xenbus_strstate(fe_state));
+ backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
+ if (!backend || IS_ERR(backend))
+ goto out;
+ err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
+ if (err == 1)
+ xenbus_reset_frontend(frontend, backend, be_state);
+ kfree(backend);
+ break;
+ default:
+ break;
+ }
+out:
+ kfree(frontend);
+}
+
+static void xenbus_reset_state(void)
+{
+ char **devclass, **dev;
+ int devclass_n, dev_n;
+ int i, j;
+
+ devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
+ if (IS_ERR(devclass))
+ return;
+
+ for (i = 0; i < devclass_n; i++) {
+ dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
+ if (IS_ERR(dev))
+ continue;
+ for (j = 0; j < dev_n; j++)
+ xenbus_check_frontend(devclass[i], dev[j]);
+ kfree(dev);
+ }
+ kfree(devclass);
+}
+
static int frontend_probe_and_watch(struct notifier_block *notifier,
unsigned long event,
void *data)
{
+ /* reset devices in Connected or Closed state */
+ if (xen_hvm_domain())
+ xenbus_reset_state();
/* Enumerate devices in xenstore and watch for changes. */
xenbus_probe_devices(&xenbus_frontend);
register_xenbus_watch(&fe_watch);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 5534690075a..b3b8f2f3ad1 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -45,6 +45,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <xen/xenbus.h>
+#include <xen/xen.h>
#include "xenbus_comms.h"
struct xs_stored_msg {
@@ -620,6 +621,15 @@ static struct xenbus_watch *find_watch(const char *token)
return NULL;
}
+static void xs_reset_watches(void)
+{
+ int err;
+
+ err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
+ if (err && err != -EEXIST)
+ printk(KERN_WARNING "xs_reset_watches failed: %d\n", err);
+}
+
/* Register callback to watch this node. */
int register_xenbus_watch(struct xenbus_watch *watch)
{
@@ -638,8 +648,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)
err = xs_watch(watch->node, token);
- /* Ignore errors due to multiple registration. */
- if ((err != 0) && (err != -EEXIST)) {
+ if (err) {
spin_lock(&watches_lock);
list_del(&watch->list);
spin_unlock(&watches_lock);
@@ -897,5 +906,9 @@ int xs_init(void)
if (IS_ERR(task))
return PTR_ERR(task);
+ /* shutdown watches for kexec boot */
+ if (xen_hvm_domain())
+ xs_reset_watches();
+
return 0;
}
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index 76f7538bb33..d29c153705b 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -25,8 +25,9 @@ extern struct balloon_stats balloon_stats;
void balloon_set_new_target(unsigned long target);
-int alloc_xenballooned_pages(int nr_pages, struct page** pages);
-void free_xenballooned_pages(int nr_pages, struct page** pages);
+int alloc_xenballooned_pages(int nr_pages, struct page **pages,
+ bool highmem);
+void free_xenballooned_pages(int nr_pages, struct page **pages);
struct sys_device;
#ifdef CONFIG_XEN_SELFBALLOONING
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index b1fab6b5b3e..6b99bfbd785 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -156,6 +156,7 @@ unsigned int gnttab_max_grant_frames(void);
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count);
int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
struct page **pages, unsigned int count);
diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h
index 99fcffb372d..f0b6890370b 100644
--- a/include/xen/interface/io/xs_wire.h
+++ b/include/xen/interface/io/xs_wire.h
@@ -26,7 +26,11 @@ enum xsd_sockmsg_type
XS_SET_PERMS,
XS_WATCH_EVENT,
XS_ERROR,
- XS_IS_DOMAIN_INTRODUCED
+ XS_IS_DOMAIN_INTRODUCED,
+ XS_RESUME,
+ XS_SET_TARGET,
+ XS_RESTRICT,
+ XS_RESET_WATCHES
};
#define XS_WRITE_NONE "NONE"
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index 534cac89a77..c1080d9c705 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -109,6 +109,7 @@ struct physdev_irq {
#define MAP_PIRQ_TYPE_MSI 0x0
#define MAP_PIRQ_TYPE_GSI 0x1
#define MAP_PIRQ_TYPE_UNKNOWN 0x2
+#define MAP_PIRQ_TYPE_MSI_SEG 0x3
#define PHYSDEVOP_map_pirq 13
struct physdev_map_pirq {
@@ -119,7 +120,7 @@ struct physdev_map_pirq {
int index;
/* IN or OUT */
int pirq;
- /* IN */
+ /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
int bus;
/* IN */
int devfn;
@@ -198,6 +199,37 @@ struct physdev_get_free_pirq {
uint32_t pirq;
};
+#define XEN_PCI_DEV_EXTFN 0x1
+#define XEN_PCI_DEV_VIRTFN 0x2
+#define XEN_PCI_DEV_PXM 0x4
+
+#define PHYSDEVOP_pci_device_add 25
+struct physdev_pci_device_add {
+ /* IN */
+ uint16_t seg;
+ uint8_t bus;
+ uint8_t devfn;
+ uint32_t flags;
+ struct {
+ uint8_t bus;
+ uint8_t devfn;
+ } physfn;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ uint32_t optarr[];
+#elif defined(__GNUC__)
+ uint32_t optarr[0];
+#endif
+};
+
+#define PHYSDEVOP_pci_device_remove 26
+#define PHYSDEVOP_restore_msi_ext 27
+struct physdev_pci_device {
+ /* IN */
+ uint16_t seg;
+ uint8_t bus;
+ uint8_t devfn;
+};
+
/*
* Notify that some PIRQ-bound event channels have been unmasked.
* ** This command is obsolete since interface version 0x00030202 and is **
diff --git a/include/xen/page.h b/include/xen/page.h
index 0be36b976f4..12765b6f951 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -3,6 +3,16 @@
#include <asm/xen/page.h>
-extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+struct xen_memory_region {
+ phys_addr_t start;
+ phys_addr_t size;
+};
+
+#define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820MAX */
+
+extern __initdata
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS];
+
+extern unsigned long xen_released_pages;
#endif /* _XEN_PAGE_H */