summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-21 14:23:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-21 14:23:48 -0700
commit157b6ceb13e4b4148ee03dd517dbe88748943125 (patch)
treef0f0f50bef7ce0be2e86465881c518fbb5ddb0de /arch/x86/kernel
parent4a60cfa9457749f7987fd4f3c956dbba5a281129 (diff)
parent6e9636693373d938aa3b13427be3d212f172ac06 (diff)
Merge branch 'x86-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86, iommu: Update header comments with appropriate naming ia64, iommu: Add a dummy iommu_table.h file in IA64. x86, iommu: Fix IOMMU_INIT alignment rules x86, doc: Adding comments about .iommu_table and its neighbors. x86, iommu: Utilize the IOMMU_INIT macros functionality. x86, VT-d: Make Intel VT-d IOMMU use IOMMU_INIT_* macros. x86, GART/AMD-VI: Make AMD GART and IOMMU use IOMMU_INIT_* macros. x86, calgary: Make Calgary IOMMU use IOMMU_INIT_* macros. x86, xen-swiotlb: Make Xen-SWIOTLB use IOMMU_INIT_* macros. x86, swiotlb: Make SWIOTLB use IOMMU_INIT_* macros. x86, swiotlb: Simplify SWIOTLB pci_swiotlb_detect routine. x86, iommu: Add proper dependency sort routine (and sanity check). x86, iommu: Make all IOMMU's detection routines return a value. x86, iommu: Add IOMMU_INIT macros, .iommu_table section, and iommu_table_entry structure
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/amd_iommu_init.c15
-rw-r--r--arch/x86/kernel/aperture_64.c11
-rw-r--r--arch/x86/kernel/pci-calgary_64.c18
-rw-r--r--arch/x86/kernel/pci-dma.c44
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/pci-iommu_table.c89
-rw-r--r--arch/x86/kernel/pci-swiotlb.c44
-rw-r--r--arch/x86/kernel/vmlinux.lds.S28
9 files changed, 207 insertions, 45 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 80a93dc9907..2c833d8c414 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -45,6 +45,7 @@ obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
+obj-y += pci-iommu_table.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3cb482e123d..6e11c813415 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -31,7 +31,7 @@
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/x86_init.h>
-
+#include <asm/iommu_table.h>
/*
* definitions for the ACPI scanning code
*/
@@ -1499,13 +1499,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
return 0;
}
-void __init amd_iommu_detect(void)
+int __init amd_iommu_detect(void)
{
if (no_iommu || (iommu_detected && !gart_iommu_aperture))
- return;
+ return -ENODEV;
if (amd_iommu_disabled)
- return;
+ return -ENODEV;
if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
iommu_detected = 1;
@@ -1514,7 +1514,9 @@ void __init amd_iommu_detect(void)
/* Make sure ACS will be enabled */
pci_request_acs();
+ return 1;
}
+ return -ENODEV;
}
/****************************************************************************
@@ -1545,3 +1547,8 @@ static int __init parse_amd_iommu_options(char *str)
__setup("amd_iommu_dump", parse_amd_iommu_dump);
__setup("amd_iommu=", parse_amd_iommu_options);
+
+IOMMU_INIT_FINISH(amd_iommu_detect,
+ gart_iommu_hole_init,
+ 0,
+ 0);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 377f5db3b8b..b3a16e8f070 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -371,7 +371,7 @@ void __init early_gart_iommu_check(void)
static int __initdata printed_gart_size_msg;
-void __init gart_iommu_hole_init(void)
+int __init gart_iommu_hole_init(void)
{
u32 agp_aper_base = 0, agp_aper_order = 0;
u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
@@ -381,7 +381,7 @@ void __init gart_iommu_hole_init(void)
if (gart_iommu_aperture_disabled || !fix_aperture ||
!early_pci_allowed())
- return;
+ return -ENODEV;
printk(KERN_INFO "Checking aperture...\n");
@@ -463,8 +463,9 @@ out:
unsigned long n = (32 * 1024 * 1024) << last_aper_order;
insert_aperture_resource((u32)last_aper_base, n);
+ return 1;
}
- return;
+ return 0;
}
if (!fallback_aper_force) {
@@ -500,7 +501,7 @@ out:
panic("Not enough memory for aperture");
}
} else {
- return;
+ return 0;
}
/* Fix up the north bridges */
@@ -526,4 +527,6 @@ out:
}
set_up_gart_resume(aper_order, aper_alloc);
+
+ return 1;
}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 078d4ec1a9d..f56a117cef6 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -47,6 +47,7 @@
#include <asm/rio.h>
#include <asm/bios_ebda.h>
#include <asm/x86_init.h>
+#include <asm/iommu_table.h>
#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
int use_calgary __read_mostly = 1;
@@ -1364,7 +1365,7 @@ static int __init calgary_iommu_init(void)
return 0;
}
-void __init detect_calgary(void)
+int __init detect_calgary(void)
{
int bus;
void *tbl;
@@ -1378,13 +1379,13 @@ void __init detect_calgary(void)
* another HW IOMMU already, bail out.
*/
if (no_iommu || iommu_detected)
- return;
+ return -ENODEV;
if (!use_calgary)
- return;
+ return -ENODEV;
if (!early_pci_allowed())
- return;
+ return -ENODEV;
printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n");
@@ -1410,13 +1411,13 @@ void __init detect_calgary(void)
if (!rio_table_hdr) {
printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table "
"in EBDA - bailing!\n");
- return;
+ return -ENODEV;
}
ret = build_detail_arrays();
if (ret) {
printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret);
- return;
+ return -ENOMEM;
}
specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
@@ -1464,7 +1465,7 @@ void __init detect_calgary(void)
x86_init.iommu.iommu_init = calgary_iommu_init;
}
- return;
+ return calgary_found;
cleanup:
for (--bus; bus >= 0; --bus) {
@@ -1473,6 +1474,7 @@ cleanup:
if (info->tce_space)
free_tce_table(info->tce_space);
}
+ return -ENOMEM;
}
static int __init calgary_parse_options(char *p)
@@ -1594,3 +1596,5 @@ static int __init calgary_fixup_tce_spaces(void)
* and before device_initcall.
*/
rootfs_initcall(calgary_fixup_tce_spaces);
+
+IOMMU_INIT_POST(detect_calgary);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 9f07cfcbd3a..9ea999a4dcc 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,9 +11,8 @@
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/calgary.h>
-#include <asm/amd_iommu.h>
#include <asm/x86_init.h>
-#include <asm/xen/swiotlb-xen.h>
+#include <asm/iommu_table.h>
static int forbid_dac __read_mostly;
@@ -45,6 +44,8 @@ int iommu_detected __read_mostly = 0;
*/
int iommu_pass_through __read_mostly;
+extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
+
/* Dummy device used for NULL arguments (normally ISA). */
struct device x86_dma_fallback_dev = {
.init_name = "fallback device",
@@ -130,26 +131,24 @@ static void __init dma32_free_bootmem(void)
void __init pci_iommu_alloc(void)
{
+ struct iommu_table_entry *p;
+
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
- if (pci_xen_swiotlb_detect() || pci_swiotlb_detect())
- goto out;
-
- gart_iommu_hole_init();
-
- detect_calgary();
-
- detect_intel_iommu();
+ sort_iommu_table(__iommu_table, __iommu_table_end);
+ check_iommu_entries(__iommu_table, __iommu_table_end);
- /* needs to be called after gart_iommu_hole_init */
- amd_iommu_detect();
-out:
- pci_xen_swiotlb_init();
-
- pci_swiotlb_init();
+ for (p = __iommu_table; p < __iommu_table_end; p++) {
+ if (p && p->detect && p->detect() > 0) {
+ p->flags |= IOMMU_DETECTED;
+ if (p->early_init)
+ p->early_init();
+ if (p->flags & IOMMU_FINISH_IF_DETECTED)
+ break;
+ }
+ }
}
-
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag)
{
@@ -292,6 +291,7 @@ EXPORT_SYMBOL(dma_supported);
static int __init pci_iommu_init(void)
{
+ struct iommu_table_entry *p;
dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
#ifdef CONFIG_PCI
@@ -299,12 +299,10 @@ static int __init pci_iommu_init(void)
#endif
x86_init.iommu.iommu_init();
- if (swiotlb || xen_swiotlb) {
- printk(KERN_INFO "PCI-DMA: "
- "Using software bounce buffering for IO (SWIOTLB)\n");
- swiotlb_print_info();
- } else
- swiotlb_free();
+ for (p = __iommu_table; p < __iommu_table_end; p++) {
+ if (p && (p->flags & IOMMU_DETECTED) && p->late_init)
+ p->late_init();
+ }
return 0;
}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c562207b1b3..ba0f0ca9f28 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -41,6 +41,7 @@
#include <asm/dma.h>
#include <asm/amd_nb.h>
#include <asm/x86_init.h>
+#include <asm/iommu_table.h>
static unsigned long iommu_bus_base; /* GART remapping area (physical) */
static unsigned long iommu_size; /* size of remapping area bytes */
@@ -905,3 +906,4 @@ void __init gart_parse_options(char *p)
}
}
}
+IOMMU_INIT_POST(gart_iommu_hole_init);
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
new file mode 100644
index 00000000000..55d745ec118
--- /dev/null
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -0,0 +1,89 @@
+#include <linux/dma-mapping.h>
+#include <asm/iommu_table.h>
+#include <linux/string.h>
+#include <linux/kallsyms.h>
+
+
+#define DEBUG 1
+
+static struct iommu_table_entry * __init
+find_dependents_of(struct iommu_table_entry *start,
+ struct iommu_table_entry *finish,
+ struct iommu_table_entry *q)
+{
+ struct iommu_table_entry *p;
+
+ if (!q)
+ return NULL;
+
+ for (p = start; p < finish; p++)
+ if (p->detect == q->depend)
+ return p;
+
+ return NULL;
+}
+
+
+void __init sort_iommu_table(struct iommu_table_entry *start,
+ struct iommu_table_entry *finish) {
+
+ struct iommu_table_entry *p, *q, tmp;
+
+ for (p = start; p < finish; p++) {
+again:
+ q = find_dependents_of(start, finish, p);
+ /* We are bit sneaky here. We use the memory address to figure
+ * out if the node we depend on is past our point, if so, swap.
+ */
+ if (q > p) {
+ tmp = *p;
+ memmove(p, q, sizeof(*p));
+ *q = tmp;
+ goto again;
+ }
+ }
+
+}
+
+#ifdef DEBUG
+void __init check_iommu_entries(struct iommu_table_entry *start,
+ struct iommu_table_entry *finish)
+{
+ struct iommu_table_entry *p, *q, *x;
+ char sym_p[KSYM_SYMBOL_LEN];
+ char sym_q[KSYM_SYMBOL_LEN];
+
+ /* Simple cyclic dependency checker. */
+ for (p = start; p < finish; p++) {
+ q = find_dependents_of(start, finish, p);
+ x = find_dependents_of(start, finish, q);
+ if (p == x) {
+ sprint_symbol(sym_p, (unsigned long)p->detect);
+ sprint_symbol(sym_q, (unsigned long)q->detect);
+
+ printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
+ " on %s and vice-versa. BREAKING IT.\n",
+ sym_p, sym_q);
+ /* Heavy handed way..*/
+ x->depend = 0;
+ }
+ }
+
+ for (p = start; p < finish; p++) {
+ q = find_dependents_of(p, finish, p);
+ if (q && q > p) {
+ sprint_symbol(sym_p, (unsigned long)p->detect);
+ sprint_symbol(sym_q, (unsigned long)q->detect);
+
+ printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
+ "should be called before %s!\n",
+ sym_p, sym_q);
+ }
+ }
+}
+#else
+inline void check_iommu_entries(struct iommu_table_entry *start,
+ struct iommu_table_entry *finish)
+{
+}
+#endif
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index a5bc528d432..8f972cbddef 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -10,7 +10,8 @@
#include <asm/iommu.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
-
+#include <asm/xen/swiotlb-xen.h>
+#include <asm/iommu_table.h>
int swiotlb __read_mostly;
static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -41,25 +42,42 @@ static struct dma_map_ops swiotlb_dma_ops = {
};
/*
- * pci_swiotlb_detect - set swiotlb to 1 if necessary
+ * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
*
* This returns non-zero if we are forced to use swiotlb (by the boot
* option).
*/
-int __init pci_swiotlb_detect(void)
+int __init pci_swiotlb_detect_override(void)
{
int use_swiotlb = swiotlb | swiotlb_force;
+ if (swiotlb_force)
+ swiotlb = 1;
+
+ return use_swiotlb;
+}
+IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
+ pci_xen_swiotlb_detect,
+ pci_swiotlb_init,
+ pci_swiotlb_late_init);
+
+/*
+ * if 4GB or more detected (and iommu=off not set) return 1
+ * and set swiotlb to 1.
+ */
+int __init pci_swiotlb_detect_4gb(void)
+{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
#ifdef CONFIG_X86_64
if (!no_iommu && max_pfn > MAX_DMA32_PFN)
swiotlb = 1;
#endif
- if (swiotlb_force)
- swiotlb = 1;
-
- return use_swiotlb;
+ return swiotlb;
}
+IOMMU_INIT(pci_swiotlb_detect_4gb,
+ pci_swiotlb_detect_override,
+ pci_swiotlb_init,
+ pci_swiotlb_late_init);
void __init pci_swiotlb_init(void)
{
@@ -68,3 +86,15 @@ void __init pci_swiotlb_init(void)
dma_ops = &swiotlb_dma_ops;
}
}
+
+void __init pci_swiotlb_late_init(void)
+{
+ /* An IOMMU turned us off. */
+ if (!swiotlb)
+ swiotlb_free();
+ else {
+ printk(KERN_INFO "PCI-DMA: "
+ "Using software bounce buffering for IO (SWIOTLB)\n");
+ swiotlb_print_info();
+ }
+}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index d0bb52296fa..38e2b67807e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -242,6 +242,12 @@ SECTIONS
__x86_cpu_dev_end = .;
}
+ /*
+ * start address and size of operations which during runtime
+ * can be patched with virtualization friendly instructions or
+ * baremetal native ones. Think page table operations.
+ * Details in paravirt_types.h
+ */
. = ALIGN(8);
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
__parainstructions = .;
@@ -249,6 +255,11 @@ SECTIONS
__parainstructions_end = .;
}
+ /*
+ * struct alt_inst entries. From the header (alternative.h):
+ * "Alternative instructions for different CPU types or capabilities"
+ * Think locking instructions on spinlocks.
+ */
. = ALIGN(8);
.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
__alt_instructions = .;
@@ -256,11 +267,28 @@ SECTIONS
__alt_instructions_end = .;
}
+ /*
+ * And here are the replacement instructions. The linker sticks
+ * them as binary blobs. The .altinstructions has enough data to
+ * get the address and the length of them to patch the kernel safely.
+ */
.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
*(.altinstr_replacement)
}
/*
+ * struct iommu_table_entry entries are injected in this section.
+ * It is an array of IOMMUs which during run time gets sorted depending
+ * on its dependency order. After rootfs_initcall is complete
+ * this section can be safely removed.
+ */
+ .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) {
+ __iommu_table = .;
+ *(.iommu_table)
+ __iommu_table_end = .;
+ }
+ . = ALIGN(8);
+ /*
* .exit.text is discard at runtime, not link time, to deal with
* references from .altinstructions and .eh_frame
*/