summaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c9
-rw-r--r--arch/x86/xen/mmu.c14
-rw-r--r--arch/x86/xen/p2m.c9
-rw-r--r--arch/x86/xen/setup.c283
-rw-r--r--arch/x86/xen/smp.c15
-rw-r--r--arch/x86/xen/time.c5
-rw-r--r--arch/x86/xen/xen-asm_32.S8
8 files changed, 176 insertions, 169 deletions
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3326204e251..add2c2d729c 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,7 +15,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
grant-table.o suspend.o platform-pci-unplug.o \
p2m.o
-obj-$(CONFIG_FTRACE) += trace.o
+obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 974a528458a..da8afd576a6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type);
unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
EXPORT_SYMBOL(machine_to_phys_mapping);
-unsigned int machine_to_phys_order;
-EXPORT_SYMBOL(machine_to_phys_order);
+unsigned long machine_to_phys_nr;
+EXPORT_SYMBOL(machine_to_phys_nr);
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void)
~((1 << X86_FEATURE_APIC) | /* disable local APIC */
(1 << X86_FEATURE_ACPI)); /* disable ACPI */
ax = 1;
+ cx = 0;
xen_cpuid(&ax, &bx, &cx, &dx);
xsave_mask =
@@ -951,6 +952,10 @@ static const struct pv_info xen_info __initconst = {
.paravirt_enabled = 1,
.shared_kernel_pmd = 0,
+#ifdef CONFIG_X86_64
+ .extra_user_64bit_cs = FLAT_USER_CS64,
+#endif
+
.name = "Xen",
};
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 4df0444b2ce..87f6673b120 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1678,15 +1678,17 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
void __init xen_setup_machphys_mapping(void)
{
struct xen_machphys_mapping mapping;
- unsigned long machine_to_phys_nr_ents;
if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
machine_to_phys_mapping = (unsigned long *)mapping.v_start;
- machine_to_phys_nr_ents = mapping.max_mfn + 1;
+ machine_to_phys_nr = mapping.max_mfn + 1;
} else {
- machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+ machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
}
- machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+#ifdef CONFIG_X86_32
+ WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
+ < machine_to_phys_mapping);
+#endif
}
#ifdef CONFIG_X86_64
@@ -1881,6 +1883,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
# endif
#else
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+ case VVAR_PAGE:
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
@@ -1921,7 +1924,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#ifdef CONFIG_X86_64
/* Replicate changes to map the vsyscall page into the user
pagetable vsyscall mapping. */
- if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+ if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
+ idx == VVAR_PAGE) {
unsigned long vaddr = __fix_to_virt(idx);
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
}
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index a8ee9a45c35..1b267e75158 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -845,13 +845,16 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
static int p2m_dump_show(struct seq_file *m, void *v)
{
static const char * const level_name[] = { "top", "middle",
- "entry", "abnormal" };
- static const char * const type_name[] = { "identity", "missing",
- "pfn", "abnormal"};
+ "entry", "abnormal", "error"};
#define TYPE_IDENTITY 0
#define TYPE_MISSING 1
#define TYPE_PFN 2
#define TYPE_UNKNOWN 3
+ static const char * const type_name[] = {
+ [TYPE_IDENTITY] = "identity",
+ [TYPE_MISSING] = "missing",
+ [TYPE_PFN] = "pfn",
+ [TYPE_UNKNOWN] = "abnormal"};
unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
unsigned int uninitialized_var(prev_level);
unsigned int uninitialized_var(prev_type);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index df118a825f3..38d0af4fefe 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -37,7 +37,10 @@ extern void xen_syscall_target(void);
extern void xen_syscall32_target(void);
/* Amount of extra memory space we add to the e820 ranges */
-phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
+
+/* Number of pages released from the initial allocation. */
+unsigned long xen_released_pages;
/*
* The maximum amount of extra memory compared to the base size. The
@@ -51,48 +54,47 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
*/
#define EXTRA_MEM_RATIO (10)
-static void __init xen_add_extra_mem(unsigned long pages)
+static void __init xen_add_extra_mem(u64 start, u64 size)
{
unsigned long pfn;
+ int i;
- u64 size = (u64)pages * PAGE_SIZE;
- u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
-
- if (!pages)
- return;
-
- e820_add_region(extra_start, size, E820_RAM);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-
- memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+ /* Add new region. */
+ if (xen_extra_mem[i].size == 0) {
+ xen_extra_mem[i].start = start;
+ xen_extra_mem[i].size = size;
+ break;
+ }
+ /* Append to existing region. */
+ if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
+ xen_extra_mem[i].size += size;
+ break;
+ }
+ }
+ if (i == XEN_EXTRA_MEM_MAX_REGIONS)
+ printk(KERN_WARNING "Warning: not enough extra memory regions\n");
- xen_extra_mem_size += size;
+ memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
- xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
+ xen_max_p2m_pfn = PFN_DOWN(start + size);
- for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
+ for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
-static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
- phys_addr_t end_addr)
+static unsigned long __init xen_release_chunk(unsigned long start,
+ unsigned long end)
{
struct xen_memory_reservation reservation = {
.address_bits = 0,
.extent_order = 0,
.domid = DOMID_SELF
};
- unsigned long start, end;
unsigned long len = 0;
unsigned long pfn;
int ret;
- start = PFN_UP(start_addr);
- end = PFN_DOWN(end_addr);
-
- if (end <= start)
- return 0;
-
for(pfn = start; pfn < end; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
@@ -117,88 +119,92 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
return len;
}
-static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
- const struct e820map *e820)
+static unsigned long __init xen_set_identity_and_release(
+ const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
- phys_addr_t max_addr = PFN_PHYS(max_pfn);
- phys_addr_t last_end = ISA_END_ADDRESS;
+ phys_addr_t start = 0;
unsigned long released = 0;
+ unsigned long identity = 0;
+ const struct e820entry *entry;
int i;
- /* Free any unused memory above the low 1Mbyte. */
- for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
- phys_addr_t end = e820->map[i].addr;
- end = min(max_addr, end);
+ /*
+ * Combine non-RAM regions and gaps until a RAM region (or the
+ * end of the map) is reached, then set the 1:1 map and
+ * release the pages (if available) in those non-RAM regions.
+ *
+ * The combined non-RAM regions are rounded to a whole number
+ * of pages so any partial pages are accessible via the 1:1
+ * mapping. This is needed for some BIOSes that put (for
+ * example) the DMI tables in a reserved region that begins on
+ * a non-page boundary.
+ */
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ phys_addr_t end = entry->addr + entry->size;
+
+ if (entry->type == E820_RAM || i == map_size - 1) {
+ unsigned long start_pfn = PFN_DOWN(start);
+ unsigned long end_pfn = PFN_UP(end);
- if (last_end < end)
- released += xen_release_chunk(last_end, end);
- last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
+ if (entry->type == E820_RAM)
+ end_pfn = PFN_UP(entry->addr);
+
+ if (start_pfn < end_pfn) {
+ if (start_pfn < nr_pages)
+ released += xen_release_chunk(
+ start_pfn, min(end_pfn, nr_pages));
+
+ identity += set_phys_range_identity(
+ start_pfn, end_pfn);
+ }
+ start = end;
+ }
}
- if (last_end < max_addr)
- released += xen_release_chunk(last_end, max_addr);
+ printk(KERN_INFO "Released %lu pages of unused memory\n", released);
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
- printk(KERN_INFO "released %lu pages of unused memory\n", released);
return released;
}
-static unsigned long __init xen_set_identity(const struct e820entry *list,
- ssize_t map_size)
+static unsigned long __init xen_get_max_pages(void)
{
- phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
- phys_addr_t start_pci = last;
- const struct e820entry *entry;
- unsigned long identity = 0;
- int i;
-
- for (i = 0, entry = list; i < map_size; i++, entry++) {
- phys_addr_t start = entry->addr;
- phys_addr_t end = start + entry->size;
-
- if (start < last)
- start = last;
-
- if (end <= start)
- continue;
+ unsigned long max_pages = MAX_DOMAIN_PAGES;
+ domid_t domid = DOMID_SELF;
+ int ret;
- /* Skip over the 1MB region. */
- if (last > end)
- continue;
+ ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
+ if (ret > 0)
+ max_pages = ret;
+ return min(max_pages, MAX_DOMAIN_PAGES);
+}
- if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
- if (start > start_pci)
- identity += set_phys_range_identity(
- PFN_UP(start_pci), PFN_DOWN(start));
+static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
+{
+ u64 end = start + size;
- /* Without saving 'last' we would gooble RAM too
- * at the end of the loop. */
- last = end;
- start_pci = end;
- continue;
- }
- start_pci = min(start, start_pci);
- last = end;
+ /* Align RAM regions to page boundaries. */
+ if (type == E820_RAM) {
+ start = PAGE_ALIGN(start);
+ end &= ~((u64)PAGE_SIZE - 1);
}
- if (last > start_pci)
- identity += set_phys_range_identity(
- PFN_UP(start_pci), PFN_DOWN(last));
- return identity;
+
+ e820_add_region(start, end - start, type);
}
+
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
char * __init xen_memory_setup(void)
{
static struct e820entry map[E820MAX] __initdata;
- static struct e820entry map_raw[E820MAX] __initdata;
unsigned long max_pfn = xen_start_info->nr_pages;
unsigned long long mem_end;
int rc;
struct xen_memory_map memmap;
+ unsigned long max_pages;
unsigned long extra_pages = 0;
- unsigned long extra_limit;
- unsigned long identity_pages = 0;
int i;
int op;
@@ -224,58 +230,65 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);
- memcpy(map_raw, map, sizeof(map));
- e820.nr_map = 0;
- xen_extra_mem_start = mem_end;
- for (i = 0; i < memmap.nr_entries; i++) {
- unsigned long long end;
-
- /* Guard against non-page aligned E820 entries. */
- if (map[i].type == E820_RAM)
- map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
-
- end = map[i].addr + map[i].size;
- if (map[i].type == E820_RAM && end > mem_end) {
- /* RAM off the end - may be partially included */
- u64 delta = min(map[i].size, end - mem_end);
-
- map[i].size -= delta;
- end -= delta;
-
- extra_pages += PFN_DOWN(delta);
- /*
- * Set RAM below 4GB that is not for us to be unusable.
- * This prevents "System RAM" address space from being
- * used as potential resource for I/O address (happens
- * when 'allocate_resource' is called).
- */
- if (delta &&
- (xen_initial_domain() && end < 0x100000000ULL))
- e820_add_region(end, delta, E820_UNUSABLE);
+ /* Make sure the Xen-supplied memory map is well-ordered. */
+ sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
+
+ max_pages = xen_get_max_pages();
+ if (max_pages > max_pfn)
+ extra_pages += max_pages - max_pfn;
+
+ /*
+ * Set P2M for all non-RAM pages and E820 gaps to be identity
+ * type PFNs. Any RAM pages that would be made inaccesible by
+ * this are first released.
+ */
+ xen_released_pages = xen_set_identity_and_release(
+ map, memmap.nr_entries, max_pfn);
+ extra_pages += xen_released_pages;
+
+ /*
+ * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
+ * factor the base size. On non-highmem systems, the base
+ * size is the full initial memory allocation; on highmem it
+ * is limited to the max size of lowmem, so that it doesn't
+ * get completely filled.
+ *
+ * In principle there could be a problem in lowmem systems if
+ * the initial memory is also very large with respect to
+ * lowmem, but we won't try to deal with that here.
+ */
+ extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
+ extra_pages);
+
+ i = 0;
+ while (i < memmap.nr_entries) {
+ u64 addr = map[i].addr;
+ u64 size = map[i].size;
+ u32 type = map[i].type;
+
+ if (type == E820_RAM) {
+ if (addr < mem_end) {
+ size = min(size, mem_end - addr);
+ } else if (extra_pages) {
+ size = min(size, (u64)extra_pages * PAGE_SIZE);
+ extra_pages -= size / PAGE_SIZE;
+ xen_add_extra_mem(addr, size);
+ } else
+ type = E820_UNUSABLE;
}
- if (map[i].size > 0 && end > xen_extra_mem_start)
- xen_extra_mem_start = end;
+ xen_align_and_add_e820_region(addr, size, type);
- /* Add region if any remains */
- if (map[i].size > 0)
- e820_add_region(map[i].addr, map[i].size, map[i].type);
+ map[i].addr += size;
+ map[i].size -= size;
+ if (map[i].size == 0)
+ i++;
}
- /* Align the balloon area so that max_low_pfn does not get set
- * to be at the _end_ of the PCI gap at the far end (fee01000).
- * Note that xen_extra_mem_start gets set in the loop above to be
- * past the last E820 region. */
- if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
- xen_extra_mem_start = (1ULL<<32);
/*
* In domU, the ISA region is normal, usable memory, but we
* reserve ISA memory anyway because too many things poke
* about in there.
- *
- * In Dom0, the host E820 information can leave gaps in the
- * ISA range, which would cause us to release those pages. To
- * avoid this, we unconditionally reserve them here.
*/
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
E820_RESERVED);
@@ -292,36 +305,6 @@ char * __init xen_memory_setup(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
-
- /*
- * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
- * factor the base size. On non-highmem systems, the base
- * size is the full initial memory allocation; on highmem it
- * is limited to the max size of lowmem, so that it doesn't
- * get completely filled.
- *
- * In principle there could be a problem in lowmem systems if
- * the initial memory is also very large with respect to
- * lowmem, but we won't try to deal with that here.
- */
- extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
- max_pfn + extra_pages);
-
- if (extra_limit >= max_pfn)
- extra_pages = extra_limit - max_pfn;
- else
- extra_pages = 0;
-
- xen_add_extra_mem(extra_pages);
-
- /*
- * Set P2M for all non-RAM pages and E820 gaps to be identity
- * type PFNs. We supply it with the non-sanitized version
- * of the E820.
- */
- identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
- printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
return "Xen";
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b4533a86d7e..041d4fe9dfe 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -32,6 +32,7 @@
#include <xen/page.h>
#include <xen/events.h>
+#include <xen/hvc-console.h>
#include "xen-ops.h"
#include "mmu.h"
@@ -207,6 +208,15 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
unsigned cpu;
unsigned int i;
+ if (skip_ioapic_setup) {
+ char *m = (max_cpus == 0) ?
+ "The nosmp parameter is incompatible with Xen; " \
+ "use Xen dom0_max_vcpus=1 parameter" :
+ "The noapic parameter is incompatible with Xen";
+
+ xen_raw_printk(m);
+ panic(m);
+ }
xen_init_lock_cpu(0);
smp_store_cpu_info(0);
@@ -521,10 +531,7 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
native_smp_prepare_cpus(max_cpus);
WARN_ON(xen_smp_intr_init(0));
- if (!xen_have_vector_callback)
- return;
xen_init_lock_cpu(0);
- xen_init_spinlocks();
}
static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
@@ -546,6 +553,8 @@ static void xen_hvm_cpu_die(unsigned int cpu)
void __init xen_hvm_smp_init(void)
{
+ if (!xen_have_vector_callback)
+ return;
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
smp_ops.cpu_up = xen_hvm_cpu_up;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 5158c505bef..163b4679556 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -168,9 +168,10 @@ cycle_t xen_clocksource_read(void)
struct pvclock_vcpu_time_info *src;
cycle_t ret;
- src = &get_cpu_var(xen_vcpu)->time;
+ preempt_disable_notrace();
+ src = &__get_cpu_var(xen_vcpu)->time;
ret = pvclock_clocksource_read(src);
- put_cpu_var(xen_vcpu);
+ preempt_enable_notrace();
return ret;
}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 22a2093b586..b040b0e518c 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -113,11 +113,13 @@ xen_iret_start_crit:
/*
* If there's something pending, mask events again so we can
- * jump back into xen_hypervisor_callback
+ * jump back into xen_hypervisor_callback. Otherwise do not
+ * touch XEN_vcpu_info_mask.
*/
- sete XEN_vcpu_info_mask(%eax)
+ jne 1f
+ movb $1, XEN_vcpu_info_mask(%eax)
- popl %eax
+1: popl %eax
/*
* From this point on the registers are restored and the stack