From ad3ad3f6a2caebf56869b83b69e23eb9fa5e0ab6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:40 -0700 Subject: x64, x2apic/intr-remap: parse ioapic scope under vt-d structures Parse the vt-d device scope structures to find the mapping between IO-APICs and the interrupt remapping hardware units. This will be used later for enabling Interrupt-remapping for IOAPIC devices. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/intr_remapping.c | 70 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 drivers/pci/intr_remapping.c (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c new file mode 100644 index 00000000000..a80b87921c6 --- /dev/null +++ b/drivers/pci/intr_remapping.c @@ -0,0 +1,70 @@ +#include +#include +#include "intel-iommu.h" +#include "intr_remapping.h" + +static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; +static int ir_ioapic_num; + +static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, + struct intel_iommu *iommu) +{ + struct acpi_dmar_hardware_unit *drhd; + struct acpi_dmar_device_scope *scope; + void *start, *end; + + drhd = (struct acpi_dmar_hardware_unit *)header; + + start = (void *)(drhd + 1); + end = ((void *)drhd) + header->length; + + while (start < end) { + scope = start; + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) { + if (ir_ioapic_num == MAX_IO_APICS) { + printk(KERN_WARNING "Exceeded Max IO APICS\n"); + return -1; + } + + printk(KERN_INFO "IOAPIC id %d under DRHD base" + " 0x%Lx\n", scope->enumeration_id, + drhd->address); + + ir_ioapic[ir_ioapic_num].iommu = iommu; + ir_ioapic[ir_ioapic_num].id = scope->enumeration_id; + ir_ioapic_num++; + } + start += scope->length; + } + + return 0; +} + +/* + * Finds the assocaition between IOAPIC's and its Interrupt-remapping + * hardware unit. + */ +int __init parse_ioapics_under_ir(void) +{ + struct dmar_drhd_unit *drhd; + int ir_supported = 0; + + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (ecap_ir_support(iommu->ecap)) { + if (ir_parse_ioapic_scope(drhd->hdr, iommu)) + return -1; + + ir_supported = 1; + } + } + + if (ir_supported && ir_ioapic_num != nr_ioapics) { + printk(KERN_WARNING + "Not all IO-APIC's listed under remapping hardware\n"); + return -1; + } + + return ir_supported; +} -- cgit v1.2.3-70-g09d2 From 2ae21010694e56461a63bfc80e960090ce0a5ed9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:43 -0700 Subject: x64, x2apic/intr-remap: Interrupt remapping infrastructure Interrupt remapping (part of Intel Virtualization Tech for directed I/O) infrastructure. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dma_remapping.h | 2 + drivers/pci/dmar.c | 16 +++++ drivers/pci/intel-iommu.c | 21 +++---- drivers/pci/intel-iommu.h | 24 +++++++- drivers/pci/intr_remapping.c | 137 +++++++++++++++++++++++++++++++++++++++++++ drivers/pci/intr_remapping.h | 2 + include/linux/dmar.h | 120 ++++++++++++++++++++++++++----------- 7 files changed, 272 insertions(+), 50 deletions(-) (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h index 05aac8ef96c..bff5c65f81d 100644 --- a/drivers/pci/dma_remapping.h +++ b/drivers/pci/dma_remapping.h @@ -145,6 +145,8 @@ struct device_domain_info { extern int init_dmars(void); extern void free_dmar_iommu(struct intel_iommu *iommu); +extern int dmar_disabled; + #ifndef CONFIG_DMAR_GFX_WA static inline void iommu_prepare_gfx_mapping(void) { diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index aba151ca6d2..23a119e6485 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -449,6 +449,22 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } +void __init detect_intel_iommu(void) +{ + int ret; + + ret = early_dmar_detect(); + +#ifdef CONFIG_DMAR + { + if (ret && !no_iommu && !iommu_detected && !swiotlb && + !dmar_disabled) + iommu_detected = 1; + } +#endif +} + + int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 347bf2e4716..ffccf2341b9 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -76,7 +76,7 @@ static long list_size; static void domain_remove_dev_info(struct dmar_domain *domain); -static int dmar_disabled; +int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; @@ -2238,15 +2238,6 @@ static void __init iommu_exit_mempool(void) } -void __init detect_intel_iommu(void) -{ - if (swiotlb || no_iommu || iommu_detected || dmar_disabled) - return; - if (early_dmar_detect()) { - iommu_detected = 1; - } -} - static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -2293,15 +2284,19 @@ int __init intel_iommu_init(void) { int ret = 0; - if (no_iommu || swiotlb || dmar_disabled) - return -ENODEV; - if (dmar_table_init()) return -ENODEV; if (dmar_dev_scope_init()) return -ENODEV; + /* + * Check the need for DMA-remapping initialization now. + * Above initialization will also be used by Interrupt-remapping. + */ + if (no_iommu || swiotlb || dmar_disabled) + return -ENODEV; + iommu_init_mempool(); dmar_init_reserved_ranges(); diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 2983ce89535..a81a74e2bd9 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -56,6 +56,7 @@ #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ #define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define OFFSET_STRIDE (9) /* @@ -157,16 +158,20 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GCMD_SRTP (((u32)1) << 30) #define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_EAFL (((u32)1) << 28) -#define DMA_GCMD_QIE (((u32)1) << 26) #define DMA_GCMD_WBF (((u32)1) << 27) +#define DMA_GCMD_QIE (((u32)1) << 26) +#define DMA_GCMD_SIRTP (((u32)1) << 24) +#define DMA_GCMD_IRE (((u32) 1) << 25) /* GSTS_REG */ #define DMA_GSTS_TES (((u32)1) << 31) #define DMA_GSTS_RTPS (((u32)1) << 30) #define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_AFLS (((u32)1) << 28) -#define DMA_GSTS_QIES (((u32)1) << 26) #define DMA_GSTS_WBFS (((u32)1) << 27) +#define DMA_GSTS_QIES (((u32)1) << 26) +#define DMA_GSTS_IRTPS (((u32)1) << 24) +#define DMA_GSTS_IRES (((u32)1) << 25) /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) @@ -245,6 +250,16 @@ struct q_inval { int free_cnt; }; +#ifdef CONFIG_INTR_REMAP +/* 1MB - maximum possible interrupt remapping table size */ +#define INTR_REMAP_PAGE_ORDER 8 +#define INTR_REMAP_TABLE_REG_SIZE 0xf + +struct ir_table { + struct irte *base; +}; +#endif + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -266,6 +281,9 @@ struct intel_iommu { struct sys_device sysdev; #endif struct q_inval *qi; /* Queued invalidation info */ +#ifdef CONFIG_INTR_REMAP + struct ir_table *ir_table; /* Interrupt remapping info */ +#endif }; static inline void __iommu_flush_cache( @@ -279,5 +297,7 @@ extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); +extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void qi_global_iec(struct intel_iommu *iommu); #endif diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index a80b87921c6..3d10cdc9031 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -1,10 +1,147 @@ #include +#include +#include +#include #include #include "intel-iommu.h" #include "intr_remapping.h" static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; +int intr_remapping_enabled; + +static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) +{ + u64 addr; + u32 cmd, sts; + unsigned long flags; + + addr = virt_to_phys((void *)iommu->ir_table->base); + + spin_lock_irqsave(&iommu->register_lock, flags); + + dmar_writeq(iommu->reg + DMAR_IRTA_REG, + (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); + + /* Set interrupt-remapping table pointer */ + cmd = iommu->gcmd | DMA_GCMD_SIRTP; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRTPS), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + /* + * global invalidation of interrupt entry cache before enabling + * interrupt-remapping. + */ + qi_global_iec(iommu); + + spin_lock_irqsave(&iommu->register_lock, flags); + + /* Enable interrupt-remapping */ + cmd = iommu->gcmd | DMA_GCMD_IRE; + iommu->gcmd |= DMA_GCMD_IRE; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRES), sts); + + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + + +static int setup_intr_remapping(struct intel_iommu *iommu, int mode) +{ + struct ir_table *ir_table; + struct page *pages; + + ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table), + GFP_KERNEL); + + if (!iommu->ir_table) + return -ENOMEM; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); + + if (!pages) { + printk(KERN_ERR "failed to allocate pages of order %d\n", + INTR_REMAP_PAGE_ORDER); + kfree(iommu->ir_table); + return -ENOMEM; + } + + ir_table->base = page_address(pages); + + iommu_set_intr_remapping(iommu, mode); + return 0; +} + +int __init enable_intr_remapping(int eim) +{ + struct dmar_drhd_unit *drhd; + int setup = 0; + + /* + * check for the Interrupt-remapping support + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + continue; + + if (eim && !ecap_eim_support(iommu->ecap)) { + printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " + " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); + return -1; + } + } + + /* + * Enable queued invalidation for all the DRHD's. + */ + for_each_drhd_unit(drhd) { + int ret; + struct intel_iommu *iommu = drhd->iommu; + ret = dmar_enable_qi(iommu); + + if (ret) { + printk(KERN_ERR "DRHD %Lx: failed to enable queued, " + " invalidation, ecap %Lx, ret %d\n", + drhd->reg_base_addr, iommu->ecap, ret); + return -1; + } + } + + /* + * Setup Interrupt-remapping for all the DRHD's now. + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + continue; + + if (setup_intr_remapping(iommu, eim)) + goto error; + + setup = 1; + } + + if (!setup) + goto error; + + intr_remapping_enabled = 1; + + return 0; + +error: + /* + * handle error condition gracefully here! + */ + return -1; +} static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, struct intel_iommu *iommu) diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h index c4a40b2f33f..05f2635bbe4 100644 --- a/drivers/pci/intr_remapping.h +++ b/drivers/pci/intr_remapping.h @@ -4,3 +4,5 @@ struct ioapic_scope { struct intel_iommu *iommu; unsigned int id; }; + +#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c4e96eb2961..8a0238dd2c1 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -25,9 +25,85 @@ #include #include -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; +struct dmar_drhd_unit { + struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + u64 reg_base_addr; /* register base address*/ + struct pci_dev **devices; /* target device array */ + int devices_cnt; /* target device count */ + u8 ignored:1; /* ignore drhd */ + u8 include_all:1; + struct intel_iommu *iommu; +}; + +extern struct list_head dmar_drhd_units; + +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) + +extern int dmar_table_init(void); +extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); + +/* Intel IOMMU detection */ +extern void detect_intel_iommu(void); + + +extern int parse_ioapics_under_ir(void); +extern int alloc_iommu(struct dmar_drhd_unit *); +#else +static inline void detect_intel_iommu(void) +{ + return; +} + +static inline int dmar_table_init(void) +{ + return -ENODEV; +} +#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ + +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + +struct irte { + union { + struct { + __u64 present : 1, + fpd : 1, + dst_mode : 1, + redir_hint : 1, + trigger_mode : 1, + dlvry_mode : 3, + avail : 4, + __reserved_1 : 4, + vector : 8, + __reserved_2 : 8, + dest_id : 32; + }; + __u64 low; + }; + + union { + struct { + __u64 sid : 16, + sq : 2, + svt : 2, + __reserved_3 : 44; + }; + __u64 high; + }; +}; +#else +#define enable_intr_remapping(mode) (-1) +#define intr_remapping_enabled (0) +#endif + +#ifdef CONFIG_DMAR extern const char *dmar_get_fault_reason(u8 fault_reason); /* Can't use the common MSI interrupt functions @@ -40,29 +116,8 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); -/* Intel IOMMU detection and initialization functions */ -extern void detect_intel_iommu(void); -extern int intel_iommu_init(void); - -extern int dmar_table_init(void); -extern int early_dmar_detect(void); -extern int dmar_dev_scope_init(void); -extern int parse_ioapics_under_ir(void); - -extern struct list_head dmar_drhd_units; +extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; - -struct dmar_drhd_unit { - struct list_head list; /* list of drhd units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - u64 reg_base_addr; /* register base address*/ - struct pci_dev **devices; /* target device array */ - int devices_cnt; /* target device count */ - u8 ignored:1; /* ignore drhd */ - u8 include_all:1; - struct intel_iommu *iommu; -}; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -72,24 +127,19 @@ struct dmar_rmrr_unit { int devices_cnt; /* target device count */ }; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) - -extern int alloc_iommu(struct dmar_drhd_unit *); +/* Intel DMAR initialization functions */ +extern int intel_iommu_init(void); +extern int dmar_disabled; #else -static inline void detect_intel_iommu(void) -{ - return; -} static inline int intel_iommu_init(void) { +#ifdef CONFIG_INTR_REMAP + return dmar_dev_scope_init(); +#else return -ENODEV; -} -static inline int dmar_table_init(void) -{ - return -ENODEV; +#endif } #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v1.2.3-70-g09d2 From b6fcb33ad6c05f152a672f7c96c1fab006527b80 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:44 -0700 Subject: x64, x2apic/intr-remap: routines managing Interrupt remapping table entries. Routines handling the management of interrupt remapping table entries. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.h | 4 + drivers/pci/intr_remapping.c | 243 +++++++++++++++++++++++++++++++++++++++++++ include/linux/dmar.h | 12 +++ 3 files changed, 259 insertions(+) (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index a81a74e2bd9..2142c01e014 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -123,6 +123,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_qis(e) ((e) & 0x2) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) /* IOTLB_REG */ @@ -255,6 +256,8 @@ struct q_inval { #define INTR_REMAP_PAGE_ORDER 8 #define INTR_REMAP_TABLE_REG_SIZE 0xf +#define INTR_REMAP_TABLE_ENTRIES 65536 + struct ir_table { struct irte *base; }; @@ -300,4 +303,5 @@ extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); +extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); #endif diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 3d10cdc9031..bddb4b19b6c 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "intel-iommu.h" #include "intr_remapping.h" @@ -10,6 +11,248 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; int intr_remapping_enabled; +static struct { + struct intel_iommu *iommu; + u16 irte_index; + u16 sub_handle; + u8 irte_mask; +} irq_2_iommu[NR_IRQS]; + +static DEFINE_SPINLOCK(irq_2_ir_lock); + +int irq_remapped(int irq) +{ + if (irq > NR_IRQS) + return 0; + + if (!irq_2_iommu[irq].iommu) + return 0; + + return 1; +} + +int get_irte(int irq, struct irte *entry) +{ + int index; + + if (!entry || irq > NR_IRQS) + return -1; + + spin_lock(&irq_2_ir_lock); + if (!irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index); + + spin_unlock(&irq_2_ir_lock); + return 0; +} + +int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) +{ + struct ir_table *table = iommu->ir_table; + u16 index, start_index; + unsigned int mask = 0; + int i; + + if (!count) + return -1; + + /* + * start the IRTE search from index 0. + */ + index = start_index = 0; + + if (count > 1) { + count = __roundup_pow_of_two(count); + mask = ilog2(count); + } + + if (mask > ecap_max_handle_mask(iommu->ecap)) { + printk(KERN_ERR + "Requested mask %x exceeds the max invalidation handle" + " mask value %Lx\n", mask, + ecap_max_handle_mask(iommu->ecap)); + return -1; + } + + spin_lock(&irq_2_ir_lock); + do { + for (i = index; i < index + count; i++) + if (table->base[i].present) + break; + /* empty index found */ + if (i == index + count) + break; + + index = (index + count) % INTR_REMAP_TABLE_ENTRIES; + + if (index == start_index) { + spin_unlock(&irq_2_ir_lock); + printk(KERN_ERR "can't allocate an IRTE\n"); + return -1; + } + } while (1); + + for (i = index; i < index + count; i++) + table->base[i].present = 1; + + irq_2_iommu[irq].iommu = iommu; + irq_2_iommu[irq].irte_index = index; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = mask; + + spin_unlock(&irq_2_ir_lock); + + return index; +} + +static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask) +{ + struct qi_desc desc; + + desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) + | QI_IEC_SELECTIVE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); +} + +int map_irq_to_irte_handle(int irq, u16 *sub_handle) +{ + int index; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + *sub_handle = irq_2_iommu[irq].sub_handle; + index = irq_2_iommu[irq].irte_index; + spin_unlock(&irq_2_ir_lock); + return index; +} + +int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) +{ + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + irq_2_iommu[irq].iommu = iommu; + irq_2_iommu[irq].irte_index = index; + irq_2_iommu[irq].sub_handle = subhandle; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) +{ + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + irq_2_iommu[irq].iommu = NULL; + irq_2_iommu[irq].irte_index = 0; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int modify_irte(int irq, struct irte *irte_modified) +{ + int index; + struct irte *irte; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + irte = &iommu->ir_table->base[index]; + + set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); + __iommu_flush_cache(iommu, irte, sizeof(*irte)); + + qi_flush_iec(iommu, index, 0); + + spin_unlock(&irq_2_ir_lock); + return 0; +} + +int flush_irte(int irq) +{ + int index; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + + qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int free_irte(int irq) +{ + int index, i; + struct irte *irte; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + irte = &iommu->ir_table->base[index]; + + if (!irq_2_iommu[irq].sub_handle) { + for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++) + set_64bit((unsigned long *)irte, 0); + qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); + } + + irq_2_iommu[irq].iommu = NULL; + irq_2_iommu[irq].irte_index = 0; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) { u64 addr; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a0238dd2c1..324bbca85a2 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -98,7 +98,19 @@ struct irte { __u64 high; }; }; +extern int get_irte(int irq, struct irte *entry); +extern int modify_irte(int irq, struct irte *irte_modified); +extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); +extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle); +extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); +extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); +extern int flush_irte(int irq); +extern int free_irte(int irq); + +extern int irq_remapped(int irq); #else +#define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) #endif -- cgit v1.2.3-70-g09d2 From 89027d35aa5b8f45ce0f7fa0911db85b46563da0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:56 -0700 Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 1 + arch/x86/kernel/io_apic_64.c | 300 +++++++++++++++++++++++++++++++++++++--- drivers/pci/intr_remapping.c | 10 ++ include/asm-x86/apic.h | 9 ++ include/asm-x86/io_apic.h | 14 ++ include/asm-x86/irq_remapping.h | 8 ++ include/linux/dmar.h | 1 + 7 files changed, 321 insertions(+), 22 deletions(-) create mode 100644 include/asm-x86/irq_remapping.h (limited to 'drivers/pci/intr_remapping.c') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a969ef78e12..d5c06917b5b 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -46,6 +46,7 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int x2apic; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b62d42ef928..9bd02ef049a 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -37,6 +37,7 @@ #include #endif #include +#include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include @@ -312,7 +314,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) pin = entry->pin; if (pin == -1) break; - io_apic_write(apic, 0x11 + pin*2, dest); + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -906,18 +913,98 @@ void setup_vector_irq(int cpu) static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) { + if (trigger) irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { + else irq_desc[irq].status &= ~IRQ_LEVEL; + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif + if (trigger) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); +} + +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) +{ + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, @@ -942,24 +1029,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest = cpu_mask_to_apicid(mask); - entry.mask = 0; /* enable IRQ */ - entry.trigger = trigger; - entry.polarity = polarity; - entry.vector = cfg->vector; - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry.mask = 1; + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } ioapic_register_intr(irq, trigger); if (irq < 16) @@ -1011,6 +1089,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; + if (intr_remapping_enabled) + return; + memset(&entry, 0, sizeof(entry)); /* @@ -1466,6 +1547,147 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP + +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); + +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct irq_desc *desc = irq_desc + irq; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte = desc->status & IRQ_LEVEL; + unsigned int dest; + unsigned long flags; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + + ret = 0; + irq_desc[irq].status &= ~IRQ_MOVE_PENDING; + cpus_clear(irq_desc[irq].pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_desc + irq; + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, + irq_desc[irq].pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + if (irq_desc[irq].status & IRQ_LEVEL) { + irq_desc[irq].status |= IRQ_MOVE_PENDING; + irq_desc[irq].pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -1522,6 +1744,17 @@ static void irq_complete_move(unsigned int irq) #else static inline void irq_complete_move(unsigned int irq) {} #endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif static void ack_apic_edge(unsigned int irq) { @@ -1596,6 +1829,21 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif + static inline void init_IO_APIC_traps(void) { int irq; @@ -1783,6 +2031,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -1809,6 +2059,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " @@ -2401,6 +2653,10 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif else set_ioapic_affinity_irq(irq, TARGET_CPUS); } diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index bddb4b19b6c..32e55c7a980 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -220,6 +220,16 @@ int flush_irte(int irq) return 0; } +struct intel_iommu *map_ioapic_to_ir(int apic) +{ + int i; + + for (i = 0; i < MAX_IO_APICS; i++) + if (ir_ioapic[i].id == apic) + return ir_ioapic[i].iommu; + return NULL; +} + int free_irte(int irq) { int index, i; diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index bb54928373c..aa746704a5c 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -134,6 +134,15 @@ extern int get_physical_broadcast(void); # define apic_write_around(x, y) apic_write_atomic((x), (y)) #endif +#ifdef CONFIG_X86_64 +static inline void ack_x2APIC_irq(void) +{ + /* Docs say use 0 for future compatibility */ + native_apic_msr_write(APIC_EOI, 0); +} +#endif + + static inline void ack_APIC_irq(void) { /* diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 1c4a99d882f..8dc2622714c 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -107,6 +107,20 @@ struct IO_APIC_route_entry { } __attribute__ ((packed)); +struct IR_IO_APIC_route_entry { + __u64 vector : 8, + zero : 3, + index2 : 1, + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, + mask : 1, + reserved : 31, + format : 1, + index : 15; +} __attribute__ ((packed)); + #ifdef CONFIG_X86_IO_APIC /* diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h new file mode 100644 index 00000000000..78242c6ffa5 --- /dev/null +++ b/include/asm-x86/irq_remapping.h @@ -0,0 +1,8 @@ +#ifndef _ASM_IRQ_REMAPPING_H +#define _ASM_IRQ_REMAPPING_H + +extern int x2apic; + +#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) + +#endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 324bbca85a2..bf41ffa7470 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) -- cgit v1.2.3-70-g09d2 From 75c46fa61bc5b4ccd20a168ff325c58771248fcd Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:57 -0700 Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 230 +++++++++++++++++++++++++++++++++++++++++-- drivers/pci/intr_remapping.c | 11 +++ include/asm-x86/msidef.h | 4 + include/linux/dmar.h | 1 + 4 files changed, 238 insertions(+), 8 deletions(-) (limited to 'drivers/pci/intr_remapping.c') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9bd02ef049a..877aa2e9d7e 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -2297,6 +2297,9 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -2315,10 +2318,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if (!err) { - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + if (err) + return err; + + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | @@ -2369,6 +2403,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) write_msi_msg(irq, &msg); irq_desc[irq].affinity = mask; } + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} +#endif #endif /* CONFIG_SMP */ /* @@ -2386,26 +2469,157 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) { + int ret; struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_desc + irq; + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ int irq, ret; + irq = create_irq(); if (irq < 0) return irq; - ret = msi_compose_msg(dev, irq, &msg); +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; } + return 0; - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif +} - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, sub_handle; + struct msi_desc *desc; +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq(); + if (irq < 0) + return irq; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } return 0; + +error: + destroy_irq(irq); + return ret; } void arch_teardown_msi_irq(unsigned int irq) diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 32e55c7a980..bb642cc5e18 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -230,6 +230,17 @@ struct intel_iommu *map_ioapic_to_ir(int apic) return NULL; } +struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd; + + drhd = dmar_find_matched_drhd_unit(dev); + if (!drhd) + return NULL; + + return drhd->iommu; +} + int free_irte(int irq) { int index, i; diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h index 296f29ce426..57fd85935e5 100644 --- a/include/asm-x86/msidef.h +++ b/include/asm-x86/msidef.h @@ -48,4 +48,8 @@ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) +#define MSI_ADDR_IR_EXT_INT (1 << 4) +#define MSI_ADDR_IR_SHV (1 << 3) +#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) +#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) #endif /* ASM_MSIDEF_H */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index bf41ffa7470..c360c558e59 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) -- cgit v1.2.3-70-g09d2 From 387179464257921eb9aa3d15cc3ff194f6945a7c Mon Sep 17 00:00:00 2001 From: "Kay, Allen M" Date: Tue, 9 Sep 2008 18:37:29 +0300 Subject: VT-d: Changes to support KVM This patch extends the VT-d driver to support KVM [Ben: fixed memory pinning] [avi: move dma_remapping.h as well] Signed-off-by: Kay, Allen M Signed-off-by: Weidong Han Signed-off-by: Ben-Ami Yassour Signed-off-by: Amit Shah Acked-by: Mark Gross Signed-off-by: Avi Kivity --- drivers/pci/dma_remapping.h | 157 -------------------- drivers/pci/dmar.c | 4 +- drivers/pci/intel-iommu.c | 116 ++++++++++++++- drivers/pci/intel-iommu.h | 307 --------------------------------------- drivers/pci/intr_remapping.c | 2 +- drivers/pci/intr_remapping.h | 2 +- drivers/pci/iova.c | 2 +- drivers/pci/iova.h | 52 ------- include/linux/dma_remapping.h | 157 ++++++++++++++++++++ include/linux/intel-iommu.h | 327 ++++++++++++++++++++++++++++++++++++++++++ include/linux/iova.h | 52 +++++++ 11 files changed, 653 insertions(+), 525 deletions(-) delete mode 100644 drivers/pci/dma_remapping.h delete mode 100644 drivers/pci/intel-iommu.h delete mode 100644 drivers/pci/iova.h create mode 100644 include/linux/dma_remapping.h create mode 100644 include/linux/intel-iommu.h create mode 100644 include/linux/iova.h (limited to 'drivers/pci/intr_remapping.c') diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h deleted file mode 100644 index bff5c65f81d..00000000000 --- a/drivers/pci/dma_remapping.h +++ /dev/null @@ -1,157 +0,0 @@ -#ifndef _DMA_REMAPPING_H -#define _DMA_REMAPPING_H - -/* - * We need a fixed PAGE_SIZE of 4K irrespective of - * arch PAGE_SIZE for IOMMU page tables. - */ -#define PAGE_SHIFT_4K (12) -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) - -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) -#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) -#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) - - -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 val; - u64 rsvd1; -}; -#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) -static inline bool root_present(struct root_entry *root) -{ - return (root->val & 1); -} -static inline void set_root_present(struct root_entry *root) -{ - root->val |= 1; -} -static inline void set_root_value(struct root_entry *root, unsigned long value) -{ - root->val |= value & PAGE_MASK_4K; -} - -struct context_entry; -static inline struct context_entry * -get_context_addr_from_root(struct root_entry *root) -{ - return (struct context_entry *) - (root_present(root)?phys_to_virt( - root->val & PAGE_MASK_4K): - NULL); -} - -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; -#define context_present(c) ((c).lo & 1) -#define context_fault_disable(c) (((c).lo >> 1) & 1) -#define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & PAGE_MASK_4K) -#define context_address_width(c) ((c).hi & 7) -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) - -#define context_set_present(c) do {(c).lo |= 1;} while (0) -#define context_set_fault_enable(c) \ - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) -#define context_set_translation_type(c, val) \ - do { \ - (c).lo &= (((u64)-1) << 4) | 3; \ - (c).lo |= ((val) & 3) << 2; \ - } while (0) -#define CONTEXT_TT_MULTI_LEVEL 0 -#define context_set_address_root(c, val) \ - do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) -#define context_set_domain_id(c, val) \ - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) - -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-11: available - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; -#define dma_clear_pte(p) do {(p).val = 0;} while (0) - -#define DMA_PTE_READ (1) -#define DMA_PTE_WRITE (2) - -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) -#define dma_set_pte_prot(p, prot) \ - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) -#define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & PAGE_MASK_4K); } while (0) -#define dma_pte_present(p) (((p).val & 3) != 0) - -struct intel_iommu; - -struct dmar_domain { - int id; /* domain id */ - struct intel_iommu *iommu; /* back pointer to owning iommu */ - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 - int flags; -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus numer */ - u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ - struct dmar_domain *domain; /* pointer to domain */ -}; - -extern int init_dmars(void); -extern void free_dmar_iommu(struct intel_iommu *iommu); - -extern int dmar_disabled; - -#ifndef CONFIG_DMAR_GFX_WA -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif /* !CONFIG_DMAR_GFX_WA */ - -#endif diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index bd2c01674f5..e842e756308 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -28,9 +28,9 @@ #include #include +#include +#include #include -#include "iova.h" -#include "intel-iommu.h" #undef PREFIX #define PREFIX "DMAR:" diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 389fdd6f4a9..fc5f2dbf532 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -33,8 +33,8 @@ #include #include #include -#include "iova.h" -#include "intel-iommu.h" +#include +#include #include /* force_iommu in this header in x86-64*/ #include #include @@ -156,7 +156,7 @@ static inline void *alloc_domain_mem(void) return iommu_kmem_cache_alloc(iommu_domain_cache); } -static inline void free_domain_mem(void *vaddr) +static void free_domain_mem(void *vaddr) { kmem_cache_free(iommu_domain_cache, vaddr); } @@ -1341,7 +1341,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) * find_domain * Note: we use struct pci_dev->dev.archdata.iommu stores the info */ -struct dmar_domain * +static struct dmar_domain * find_domain(struct pci_dev *pdev) { struct device_domain_info *info; @@ -2318,3 +2318,111 @@ int __init intel_iommu_init(void) return 0; } +void intel_iommu_domain_exit(struct dmar_domain *domain) +{ + u64 end; + + /* Domain 0 is reserved, so dont process it */ + if (!domain) + return; + + end = DOMAIN_MAX_ADDR(domain->gaw); + end = end & (~PAGE_MASK_4K); + + /* clear ptes */ + dma_pte_clear_range(domain, 0, end); + + /* free page tables */ + dma_pte_free_pagetable(domain, 0, end); + + iommu_free_domain(domain); + free_domain_mem(domain); +} +EXPORT_SYMBOL_GPL(intel_iommu_domain_exit); + +struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev) +{ + struct dmar_drhd_unit *drhd; + struct dmar_domain *domain; + struct intel_iommu *iommu; + + drhd = dmar_find_matched_drhd_unit(pdev); + if (!drhd) { + printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n"); + return NULL; + } + + iommu = drhd->iommu; + if (!iommu) { + printk(KERN_ERR + "intel_iommu_domain_alloc: iommu == NULL\n"); + return NULL; + } + domain = iommu_alloc_domain(iommu); + if (!domain) { + printk(KERN_ERR + "intel_iommu_domain_alloc: domain == NULL\n"); + return NULL; + } + if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + printk(KERN_ERR + "intel_iommu_domain_alloc: domain_init() failed\n"); + intel_iommu_domain_exit(domain); + return NULL; + } + return domain; +} +EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc); + +int intel_iommu_context_mapping( + struct dmar_domain *domain, struct pci_dev *pdev) +{ + int rc; + rc = domain_context_mapping(domain, pdev); + return rc; +} +EXPORT_SYMBOL_GPL(intel_iommu_context_mapping); + +int intel_iommu_page_mapping( + struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot) +{ + int rc; + rc = domain_page_mapping(domain, iova, hpa, size, prot); + return rc; +} +EXPORT_SYMBOL_GPL(intel_iommu_page_mapping); + +void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn) +{ + detach_domain_for_dev(domain, bus, devfn); +} +EXPORT_SYMBOL_GPL(intel_iommu_detach_dev); + +struct dmar_domain * +intel_iommu_find_domain(struct pci_dev *pdev) +{ + return find_domain(pdev); +} +EXPORT_SYMBOL_GPL(intel_iommu_find_domain); + +int intel_iommu_found(void) +{ + return g_num_of_iommus; +} +EXPORT_SYMBOL_GPL(intel_iommu_found); + +u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) +{ + struct dma_pte *pte; + u64 pfn; + + pfn = 0; + pte = addr_to_dma_pte(domain, iova); + + if (pte) + pfn = dma_pte_addr(*pte); + + return pfn >> PAGE_SHIFT_4K; +} +EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn); diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h deleted file mode 100644 index 2142c01e014..00000000000 --- a/drivers/pci/intel-iommu.h +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Author: Ashok Raj - * Author: Anil S Keshavamurthy - */ - -#ifndef _INTEL_IOMMU_H_ -#define _INTEL_IOMMU_H_ - -#include -#include -#include -#include "iova.h" -#include -#include -#include "dma_remapping.h" - -/* - * Intel IOMMU register specification per version 1.0 public spec. - */ - -#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ -#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ -#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ -#define DMAR_GCMD_REG 0x18 /* Global command register */ -#define DMAR_GSTS_REG 0x1c /* Global status register */ -#define DMAR_RTADDR_REG 0x20 /* Root entry table */ -#define DMAR_CCMD_REG 0x28 /* Context command reg */ -#define DMAR_FSTS_REG 0x34 /* Fault Status register */ -#define DMAR_FECTL_REG 0x38 /* Fault control register */ -#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ -#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ -#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ -#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ -#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ -#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ -#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ -#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ -#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ -#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ -#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ -#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ -#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ -#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ - -#define OFFSET_STRIDE (9) -/* -#define dmar_readl(dmar, reg) readl(dmar + reg) -#define dmar_readq(dmar, reg) ({ \ - u32 lo, hi; \ - lo = readl(dmar + reg); \ - hi = readl(dmar + reg + 4); \ - (((u64) hi) << 32) + lo; }) -*/ -static inline u64 dmar_readq(void __iomem *addr) -{ - u32 lo, hi; - lo = readl(addr); - hi = readl(addr + 4); - return (((u64) hi) << 32) + lo; -} - -static inline void dmar_writeq(void __iomem *addr, u64 val) -{ - writel((u32)val, addr); - writel((u32)(val >> 32), addr + 4); -} - -#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) -#define DMAR_VER_MINOR(v) ((v) & 0x0f) - -/* - * Decoding Capability Register - */ -#define cap_read_drain(c) (((c) >> 55) & 1) -#define cap_write_drain(c) (((c) >> 54) & 1) -#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) -#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) -#define cap_pgsel_inv(c) (((c) >> 39) & 1) - -#define cap_super_page_val(c) (((c) >> 34) & 0xf) -#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ - * OFFSET_STRIDE) + 21) - -#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) -#define cap_max_fault_reg_offset(c) \ - (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) - -#define cap_zlr(c) (((c) >> 22) & 1) -#define cap_isoch(c) (((c) >> 23) & 1) -#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) -#define cap_sagaw(c) (((c) >> 8) & 0x1f) -#define cap_caching_mode(c) (((c) >> 7) & 1) -#define cap_phmr(c) (((c) >> 6) & 1) -#define cap_plmr(c) (((c) >> 5) & 1) -#define cap_rwbf(c) (((c) >> 4) & 1) -#define cap_afl(c) (((c) >> 3) & 1) -#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) -/* - * Extended Capability Register - */ - -#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) -#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) -#define ecap_max_iotlb_offset(e) \ - (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) -#define ecap_coherent(e) ((e) & 0x1) -#define ecap_qis(e) ((e) & 0x2) -#define ecap_eim_support(e) ((e >> 4) & 0x1) -#define ecap_ir_support(e) ((e >> 3) & 0x1) -#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) - - -/* IOTLB_REG */ -#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) -#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) -#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) -#define DMA_TLB_IIRG(type) ((type >> 60) & 7) -#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) -#define DMA_TLB_READ_DRAIN (((u64)1) << 49) -#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) -#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) -#define DMA_TLB_IVT (((u64)1) << 63) -#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) -#define DMA_TLB_MAX_SIZE (0x3f) - -/* INVALID_DESC */ -#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) -#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) -#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) -#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) -#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) -#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) -#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) -#define DMA_ID_TLB_ADDR(addr) (addr) -#define DMA_ID_TLB_ADDR_MASK(mask) (mask) - -/* PMEN_REG */ -#define DMA_PMEN_EPM (((u32)1)<<31) -#define DMA_PMEN_PRS (((u32)1)<<0) - -/* GCMD_REG */ -#define DMA_GCMD_TE (((u32)1) << 31) -#define DMA_GCMD_SRTP (((u32)1) << 30) -#define DMA_GCMD_SFL (((u32)1) << 29) -#define DMA_GCMD_EAFL (((u32)1) << 28) -#define DMA_GCMD_WBF (((u32)1) << 27) -#define DMA_GCMD_QIE (((u32)1) << 26) -#define DMA_GCMD_SIRTP (((u32)1) << 24) -#define DMA_GCMD_IRE (((u32) 1) << 25) - -/* GSTS_REG */ -#define DMA_GSTS_TES (((u32)1) << 31) -#define DMA_GSTS_RTPS (((u32)1) << 30) -#define DMA_GSTS_FLS (((u32)1) << 29) -#define DMA_GSTS_AFLS (((u32)1) << 28) -#define DMA_GSTS_WBFS (((u32)1) << 27) -#define DMA_GSTS_QIES (((u32)1) << 26) -#define DMA_GSTS_IRTPS (((u32)1) << 24) -#define DMA_GSTS_IRES (((u32)1) << 25) - -/* CCMD_REG */ -#define DMA_CCMD_ICC (((u64)1) << 63) -#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) -#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) -#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) -#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) -#define DMA_CCMD_MASK_NOBIT 0 -#define DMA_CCMD_MASK_1BIT 1 -#define DMA_CCMD_MASK_2BIT 2 -#define DMA_CCMD_MASK_3BIT 3 -#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) -#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) - -/* FECTL_REG */ -#define DMA_FECTL_IM (((u32)1) << 31) - -/* FSTS_REG */ -#define DMA_FSTS_PPF ((u32)2) -#define DMA_FSTS_PFO ((u32)1) -#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) - -/* FRCD_REG, 32 bits access */ -#define DMA_FRCD_F (((u32)1) << 31) -#define dma_frcd_type(d) ((d >> 30) & 1) -#define dma_frcd_fault_reason(c) (c & 0xff) -#define dma_frcd_source_id(c) (c & 0xffff) -#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ - -#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ - -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -{\ - cycles_t start_time = get_cycles();\ - while (1) {\ - sts = op (iommu->reg + offset);\ - if (cond)\ - break;\ - if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ - panic("DMAR hardware is malfunctioning\n");\ - cpu_relax();\ - }\ -} - -#define QI_LENGTH 256 /* queue length */ - -enum { - QI_FREE, - QI_IN_USE, - QI_DONE -}; - -#define QI_CC_TYPE 0x1 -#define QI_IOTLB_TYPE 0x2 -#define QI_DIOTLB_TYPE 0x3 -#define QI_IEC_TYPE 0x4 -#define QI_IWD_TYPE 0x5 - -#define QI_IEC_SELECTIVE (((u64)1) << 4) -#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) -#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) - -#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) -#define QI_IWD_STATUS_WRITE (((u64)1) << 5) - -struct qi_desc { - u64 low, high; -}; - -struct q_inval { - spinlock_t q_lock; - struct qi_desc *desc; /* invalidation queue */ - int *desc_status; /* desc status */ - int free_head; /* first free entry */ - int free_tail; /* last free entry */ - int free_cnt; -}; - -#ifdef CONFIG_INTR_REMAP -/* 1MB - maximum possible interrupt remapping table size */ -#define INTR_REMAP_PAGE_ORDER 8 -#define INTR_REMAP_TABLE_REG_SIZE 0xf - -#define INTR_REMAP_TABLE_ENTRIES 65536 - -struct ir_table { - struct irte *base; -}; -#endif - -struct intel_iommu { - void __iomem *reg; /* Pointer to hardware regs, virtual addr */ - u64 cap; - u64 ecap; - int seg; - u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - spinlock_t register_lock; /* protect register handling */ - int seq_id; /* sequence id of the iommu */ - -#ifdef CONFIG_DMAR - unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain **domains; /* ptr to domains */ - spinlock_t lock; /* protect context, domain ids */ - struct root_entry *root_entry; /* virtual address */ - - unsigned int irq; - unsigned char name[7]; /* Device Name */ - struct msi_msg saved_msg; - struct sys_device sysdev; -#endif - struct q_inval *qi; /* Queued invalidation info */ -#ifdef CONFIG_INTR_REMAP - struct ir_table *ir_table; /* Interrupt remapping info */ -#endif -}; - -static inline void __iommu_flush_cache( - struct intel_iommu *iommu, void *addr, int size) -{ - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(addr, size); -} - -extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); - -extern int alloc_iommu(struct dmar_drhd_unit *drhd); -extern void free_iommu(struct intel_iommu *iommu); -extern int dmar_enable_qi(struct intel_iommu *iommu); -extern void qi_global_iec(struct intel_iommu *iommu); - -extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -#endif diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index bb642cc5e18..738d4c89581 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -4,7 +4,7 @@ #include #include #include -#include "intel-iommu.h" +#include #include "intr_remapping.h" static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h index 05f2635bbe4..ca48f0df8ac 100644 --- a/drivers/pci/intr_remapping.h +++ b/drivers/pci/intr_remapping.h @@ -1,4 +1,4 @@ -#include "intel-iommu.h" +#include struct ioapic_scope { struct intel_iommu *iommu; diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 3ef4ac06431..2287116e982 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -7,7 +7,7 @@ * Author: Anil S Keshavamurthy */ -#include "iova.h" +#include void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h deleted file mode 100644 index 228f6c94b69..00000000000 --- a/drivers/pci/iova.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This file is released under the GPLv2. - * - * Copyright (C) 2006-2008 Intel Corporation - * Author: Anil S Keshavamurthy - * - */ - -#ifndef _IOVA_H_ -#define _IOVA_H_ - -#include -#include -#include -#include - -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) - -/* iova structure */ -struct iova { - struct rb_node node; - unsigned long pfn_hi; /* IOMMU dish out addr hi */ - unsigned long pfn_lo; /* IOMMU dish out addr lo */ -}; - -/* holds all the iova translations for a domain */ -struct iova_domain { - spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ - spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ - struct rb_root rbroot; /* iova domain rbtree root */ - struct rb_node *cached32_node; /* Save last alloced node */ - unsigned long dma_32bit_pfn; -}; - -struct iova *alloc_iova_mem(void); -void free_iova_mem(struct iova *iova); -void free_iova(struct iova_domain *iovad, unsigned long pfn); -void __free_iova(struct iova_domain *iovad, struct iova *iova); -struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn, - bool size_aligned); -struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, - unsigned long pfn_hi); -void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); -struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); -void put_iova_domain(struct iova_domain *iovad); - -#endif diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h new file mode 100644 index 00000000000..bff5c65f81d --- /dev/null +++ b/include/linux/dma_remapping.h @@ -0,0 +1,157 @@ +#ifndef _DMA_REMAPPING_H +#define _DMA_REMAPPING_H + +/* + * We need a fixed PAGE_SIZE of 4K irrespective of + * arch PAGE_SIZE for IOMMU page tables. + */ +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) + + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & PAGE_MASK_4K; +} + +struct context_entry; +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & PAGE_MASK_4K): + NULL); +} + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) + +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & PAGE_MASK_4K); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) + +struct intel_iommu; + +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +extern int init_dmars(void); +extern void free_dmar_iommu(struct intel_iommu *iommu); + +extern int dmar_disabled; + +#ifndef CONFIG_DMAR_GFX_WA +static inline void iommu_prepare_gfx_mapping(void) +{ + return; +} +#endif /* !CONFIG_DMAR_GFX_WA */ + +#endif diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h new file mode 100644 index 00000000000..2e117f30a76 --- /dev/null +++ b/include/linux/intel-iommu.h @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Ashok Raj + * Author: Anil S Keshavamurthy + */ + +#ifndef _INTEL_IOMMU_H_ +#define _INTEL_IOMMU_H_ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ + +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_GCMD_REG 0x18 /* Global command register */ +#define DMAR_GSTS_REG 0x1c /* Global status register */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_CCMD_REG 0x28 /* Context command reg */ +#define DMAR_FSTS_REG 0x34 /* Fault Status register */ +#define DMAR_FECTL_REG 0x38 /* Fault control register */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ +#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ +#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ +#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ +#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ + +#define OFFSET_STRIDE (9) +/* +#define dmar_readl(dmar, reg) readl(dmar + reg) +#define dmar_readq(dmar, reg) ({ \ + u32 lo, hi; \ + lo = readl(dmar + reg); \ + hi = readl(dmar + reg + 4); \ + (((u64) hi) << 32) + lo; }) +*/ +static inline u64 dmar_readq(void __iomem *addr) +{ + u32 lo, hi; + lo = readl(addr); + hi = readl(addr + 4); + return (((u64) hi) << 32) + lo; +} + +static inline void dmar_writeq(void __iomem *addr, u64 val) +{ + writel((u32)val, addr); + writel((u32)(val >> 32), addr + 4); +} + +#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) +#define DMAR_VER_MINOR(v) ((v) & 0x0f) + +/* + * Decoding Capability Register + */ +#define cap_read_drain(c) (((c) >> 55) & 1) +#define cap_write_drain(c) (((c) >> 54) & 1) +#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) +#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) +#define cap_pgsel_inv(c) (((c) >> 39) & 1) + +#define cap_super_page_val(c) (((c) >> 34) & 0xf) +#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ + * OFFSET_STRIDE) + 21) + +#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) +#define cap_max_fault_reg_offset(c) \ + (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) + +#define cap_zlr(c) (((c) >> 22) & 1) +#define cap_isoch(c) (((c) >> 23) & 1) +#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) +#define cap_sagaw(c) (((c) >> 8) & 0x1f) +#define cap_caching_mode(c) (((c) >> 7) & 1) +#define cap_phmr(c) (((c) >> 6) & 1) +#define cap_plmr(c) (((c) >> 5) & 1) +#define cap_rwbf(c) (((c) >> 4) & 1) +#define cap_afl(c) (((c) >> 3) & 1) +#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) +/* + * Extended Capability Register + */ + +#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) +#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) +#define ecap_max_iotlb_offset(e) \ + (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) +#define ecap_coherent(e) ((e) & 0x1) +#define ecap_qis(e) ((e) & 0x2) +#define ecap_eim_support(e) ((e >> 4) & 0x1) +#define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) + + +/* IOTLB_REG */ +#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) +#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) +#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) +#define DMA_TLB_IIRG(type) ((type >> 60) & 7) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) +#define DMA_TLB_READ_DRAIN (((u64)1) << 49) +#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) +#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) +#define DMA_TLB_IVT (((u64)1) << 63) +#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_TLB_MAX_SIZE (0x3f) + +/* INVALID_DESC */ +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) +#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) +#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) +#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_ID_TLB_ADDR(addr) (addr) +#define DMA_ID_TLB_ADDR_MASK(mask) (mask) + +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)<<31) +#define DMA_PMEN_PRS (((u32)1)<<0) + +/* GCMD_REG */ +#define DMA_GCMD_TE (((u32)1) << 31) +#define DMA_GCMD_SRTP (((u32)1) << 30) +#define DMA_GCMD_SFL (((u32)1) << 29) +#define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_WBF (((u32)1) << 27) +#define DMA_GCMD_QIE (((u32)1) << 26) +#define DMA_GCMD_SIRTP (((u32)1) << 24) +#define DMA_GCMD_IRE (((u32) 1) << 25) + +/* GSTS_REG */ +#define DMA_GSTS_TES (((u32)1) << 31) +#define DMA_GSTS_RTPS (((u32)1) << 30) +#define DMA_GSTS_FLS (((u32)1) << 29) +#define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_WBFS (((u32)1) << 27) +#define DMA_GSTS_QIES (((u32)1) << 26) +#define DMA_GSTS_IRTPS (((u32)1) << 24) +#define DMA_GSTS_IRES (((u32)1) << 25) + +/* CCMD_REG */ +#define DMA_CCMD_ICC (((u64)1) << 63) +#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) +#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) +#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) +#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) +#define DMA_CCMD_MASK_NOBIT 0 +#define DMA_CCMD_MASK_1BIT 1 +#define DMA_CCMD_MASK_2BIT 2 +#define DMA_CCMD_MASK_3BIT 3 +#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) +#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) + +/* FECTL_REG */ +#define DMA_FECTL_IM (((u32)1) << 31) + +/* FSTS_REG */ +#define DMA_FSTS_PPF ((u32)2) +#define DMA_FSTS_PFO ((u32)1) +#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) + +/* FRCD_REG, 32 bits access */ +#define DMA_FRCD_F (((u32)1) << 31) +#define dma_frcd_type(d) ((d >> 30) & 1) +#define dma_frcd_fault_reason(c) (c & 0xff) +#define dma_frcd_source_id(c) (c & 0xffff) +#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ + +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ + +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +{\ + cycles_t start_time = get_cycles();\ + while (1) {\ + sts = op (iommu->reg + offset);\ + if (cond)\ + break;\ + if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ + panic("DMAR hardware is malfunctioning\n");\ + cpu_relax();\ + }\ +} + +#define QI_LENGTH 256 /* queue length */ + +enum { + QI_FREE, + QI_IN_USE, + QI_DONE +}; + +#define QI_CC_TYPE 0x1 +#define QI_IOTLB_TYPE 0x2 +#define QI_DIOTLB_TYPE 0x3 +#define QI_IEC_TYPE 0x4 +#define QI_IWD_TYPE 0x5 + +#define QI_IEC_SELECTIVE (((u64)1) << 4) +#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) +#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) + +#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) +#define QI_IWD_STATUS_WRITE (((u64)1) << 5) + +struct qi_desc { + u64 low, high; +}; + +struct q_inval { + spinlock_t q_lock; + struct qi_desc *desc; /* invalidation queue */ + int *desc_status; /* desc status */ + int free_head; /* first free entry */ + int free_tail; /* last free entry */ + int free_cnt; +}; + +#ifdef CONFIG_INTR_REMAP +/* 1MB - maximum possible interrupt remapping table size */ +#define INTR_REMAP_PAGE_ORDER 8 +#define INTR_REMAP_TABLE_REG_SIZE 0xf + +#define INTR_REMAP_TABLE_ENTRIES 65536 + +struct ir_table { + struct irte *base; +}; +#endif + +struct intel_iommu { + void __iomem *reg; /* Pointer to hardware regs, virtual addr */ + u64 cap; + u64 ecap; + int seg; + u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ + spinlock_t register_lock; /* protect register handling */ + int seq_id; /* sequence id of the iommu */ + +#ifdef CONFIG_DMAR + unsigned long *domain_ids; /* bitmap of domains */ + struct dmar_domain **domains; /* ptr to domains */ + spinlock_t lock; /* protect context, domain ids */ + struct root_entry *root_entry; /* virtual address */ + + unsigned int irq; + unsigned char name[7]; /* Device Name */ + struct msi_msg saved_msg; + struct sys_device sysdev; +#endif + struct q_inval *qi; /* Queued invalidation info */ +#ifdef CONFIG_INTR_REMAP + struct ir_table *ir_table; /* Interrupt remapping info */ +#endif +}; + +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + +extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); + +extern int alloc_iommu(struct dmar_drhd_unit *drhd); +extern void free_iommu(struct intel_iommu *iommu); +extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void qi_global_iec(struct intel_iommu *iommu); + +extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); + +void intel_iommu_domain_exit(struct dmar_domain *domain); +struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); +int intel_iommu_context_mapping(struct dmar_domain *domain, + struct pci_dev *pdev); +int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot); +void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); +struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); +u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); + +#ifdef CONFIG_DMAR +int intel_iommu_found(void); +#else /* CONFIG_DMAR */ +static inline int intel_iommu_found(void) +{ + return 0; +} +#endif /* CONFIG_DMAR */ + +#endif diff --git a/include/linux/iova.h b/include/linux/iova.h new file mode 100644 index 00000000000..228f6c94b69 --- /dev/null +++ b/include/linux/iova.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This file is released under the GPLv2. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Anil S Keshavamurthy + * + */ + +#ifndef _IOVA_H_ +#define _IOVA_H_ + +#include +#include +#include +#include + +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) + +/* iova structure */ +struct iova { + struct rb_node node; + unsigned long pfn_hi; /* IOMMU dish out addr hi */ + unsigned long pfn_lo; /* IOMMU dish out addr lo */ +}; + +/* holds all the iova translations for a domain */ +struct iova_domain { + spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ + spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ + struct rb_root rbroot; /* iova domain rbtree root */ + struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long dma_32bit_pfn; +}; + +struct iova *alloc_iova_mem(void); +void free_iova_mem(struct iova *iova); +void free_iova(struct iova_domain *iovad, unsigned long pfn); +void __free_iova(struct iova_domain *iovad, struct iova *iova); +struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn, + bool size_aligned); +struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, + unsigned long pfn_hi); +void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); +void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); +struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); +void put_iova_domain(struct iova_domain *iovad); + +#endif -- cgit v1.2.3-70-g09d2