diff options
-rw-r--r-- | drivers/pci/intel-iommu.c | 325 | ||||
-rw-r--r-- | drivers/pci/iova.c | 63 | ||||
-rw-r--r-- | drivers/pci/iova.h | 3 |
3 files changed, 231 insertions, 160 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4cca5b939e0..dab329f0158 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -665,24 +665,10 @@ static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did, non_present_entry_flush); } -static int iommu_get_alignment(u64 base, unsigned int size) -{ - int t = 0; - u64 end; - - end = base + size - 1; - while (base != end) { - t++; - base >>= 1; - end >>= 1; - } - return t; -} - static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int pages, int non_present_entry_flush) { - unsigned int align; + unsigned int mask; BUG_ON(addr & (~PAGE_MASK_4K)); BUG_ON(pages == 0); @@ -696,16 +682,13 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, * PSI requires page size to be 2 ^ x, and the base address is naturally * aligned to the size */ - align = iommu_get_alignment(addr >> PAGE_SHIFT_4K, pages); + mask = ilog2(__roundup_pow_of_two(pages)); /* Fallback to domain selective flush if size is too big */ - if (align > cap_max_amask_val(iommu->cap)) + if (mask > cap_max_amask_val(iommu->cap)) return iommu_flush_iotlb_dsi(iommu, did, non_present_entry_flush); - addr >>= PAGE_SHIFT_4K + align; - addr <<= PAGE_SHIFT_4K + align; - - return __iommu_flush_iotlb(iommu, did, addr, align, + return __iommu_flush_iotlb(iommu, did, addr, mask, DMA_TLB_PSI_FLUSH, non_present_entry_flush); } @@ -1772,78 +1755,103 @@ static inline u64 aligned_size(u64 host_addr, size_t size) } struct iova * -iommu_alloc_iova(struct dmar_domain *domain, void *host_addr, size_t size, - u64 start, u64 end) +iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) { - u64 start_addr; struct iova *piova; /* Make sure it's in range */ - if ((start > DOMAIN_MAX_ADDR(domain->gaw)) || end < start) - return NULL; - end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); - start_addr = PAGE_ALIGN_4K(start); - size = aligned_size((u64)host_addr, size); - if (!size || (start_addr + size > end)) + if (!size || (IOVA_START_ADDR + size > end)) return NULL; piova = alloc_iova(&domain->iovad, - size >> PAGE_SHIFT_4K, IOVA_PFN(end)); - + size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1); return piova; } -static dma_addr_t __intel_map_single(struct device *dev, void *addr, - size_t size, int dir, u64 *flush_addr, unsigned int *flush_size) +static struct iova * +__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, + size_t size) { - struct dmar_domain *domain; struct pci_dev *pdev = to_pci_dev(dev); - int ret; - int prot = 0; struct iova *iova = NULL; - u64 start_addr; - - addr = (void *)virt_to_phys(addr); - - domain = get_domain_for_dev(pdev, - DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) { - printk(KERN_ERR - "Allocating domain for %s failed", pci_name(pdev)); - return 0; - } - - start_addr = IOVA_START_ADDR; if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) { - iova = iommu_alloc_iova(domain, addr, size, start_addr, - pdev->dma_mask); + iova = iommu_alloc_iova(domain, size, pdev->dma_mask); } else { /* * First try to allocate an io virtual address in * DMA_32BIT_MASK and if that fails then try allocating * from higer range */ - iova = iommu_alloc_iova(domain, addr, size, start_addr, - DMA_32BIT_MASK); + iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK); if (!iova) - iova = iommu_alloc_iova(domain, addr, size, start_addr, - pdev->dma_mask); + iova = iommu_alloc_iova(domain, size, pdev->dma_mask); } if (!iova) { printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); + return NULL; + } + + return iova; +} + +static struct dmar_domain * +get_valid_domain_for_dev(struct pci_dev *pdev) +{ + struct dmar_domain *domain; + int ret; + + domain = get_domain_for_dev(pdev, + DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) { + printk(KERN_ERR + "Allocating domain for %s failed", pci_name(pdev)); return 0; } /* make sure context mapping is ok */ if (unlikely(!domain_context_mapped(domain, pdev))) { ret = domain_context_mapping(domain, pdev); - if (ret) - goto error; + if (ret) { + printk(KERN_ERR + "Domain context map for %s failed", + pci_name(pdev)); + return 0; + } } + return domain; +} + +static dma_addr_t intel_map_single(struct device *hwdev, void *addr, + size_t size, int dir) +{ + struct pci_dev *pdev = to_pci_dev(hwdev); + int ret; + struct dmar_domain *domain; + unsigned long start_addr; + struct iova *iova; + int prot = 0; + + BUG_ON(dir == DMA_NONE); + if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) + return virt_to_bus(addr); + + domain = get_valid_domain_for_dev(pdev); + if (!domain) + return 0; + + addr = (void *)virt_to_phys(addr); + size = aligned_size((u64)addr, size); + + iova = __intel_alloc_iova(hwdev, domain, size); + if (!iova) + goto error; + + start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + /* * Check if DMAR supports zero-length reads on write only * mappings.. @@ -1859,101 +1867,65 @@ static dma_addr_t __intel_map_single(struct device *dev, void *addr, * might have two guest_addr mapping to the same host addr, but this * is not a big problem */ - ret = domain_page_mapping(domain, iova->pfn_lo << PAGE_SHIFT_4K, - ((u64)addr) & PAGE_MASK_4K, - (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, prot); + ret = domain_page_mapping(domain, start_addr, + ((u64)addr) & PAGE_MASK_4K, size, prot); if (ret) goto error; pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n", pci_name(pdev), size, (u64)addr, - (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, - (u64)(iova->pfn_lo << PAGE_SHIFT_4K), dir); + size, (u64)start_addr, dir); + + /* it's a non-present to present mapping */ + ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, + start_addr, size >> PAGE_SHIFT_4K, 1); + if (ret) + iommu_flush_write_buffer(domain->iommu); + + return (start_addr + ((u64)addr & (~PAGE_MASK_4K))); - *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K; - *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K; - return (iova->pfn_lo << PAGE_SHIFT_4K) + ((u64)addr & (~PAGE_MASK_4K)); error: - __free_iova(&domain->iovad, iova); + if (iova) + __free_iova(&domain->iovad, iova); printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", pci_name(pdev), size, (u64)addr, dir); return 0; } -static dma_addr_t intel_map_single(struct device *hwdev, void *addr, +static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, int dir) { - struct pci_dev *pdev = to_pci_dev(hwdev); - dma_addr_t ret; - struct dmar_domain *domain; - u64 flush_addr; - unsigned int flush_size; - - BUG_ON(dir == DMA_NONE); - if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) - return virt_to_bus(addr); - - ret = __intel_map_single(hwdev, addr, size, - dir, &flush_addr, &flush_size); - if (ret) { - domain = find_domain(pdev); - /* it's a non-present to present mapping */ - if (iommu_flush_iotlb_psi(domain->iommu, domain->id, - flush_addr, flush_size >> PAGE_SHIFT_4K, 1)) - iommu_flush_write_buffer(domain->iommu); - } - return ret; -} - -static void __intel_unmap_single(struct device *dev, dma_addr_t dev_addr, - size_t size, int dir, u64 *flush_addr, unsigned int *flush_size) -{ - struct dmar_domain *domain; struct pci_dev *pdev = to_pci_dev(dev); + struct dmar_domain *domain; + unsigned long start_addr; struct iova *iova; + if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) + return; domain = find_domain(pdev); BUG_ON(!domain); iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); - if (!iova) { - *flush_size = 0; + if (!iova) return; - } - pr_debug("Device %s unmapping: %lx@%llx\n", - pci_name(pdev), - (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, - (u64)(iova->pfn_lo << PAGE_SHIFT_4K)); - - *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K; - *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K; - /* clear the whole page, not just dev_addr - (dev_addr + size) */ - dma_pte_clear_range(domain, *flush_addr, *flush_addr + *flush_size); - /* free page tables */ - dma_pte_free_pagetable(domain, *flush_addr, *flush_addr + *flush_size); - /* free iova */ - __free_iova(&domain->iovad, iova); -} -static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, - size_t size, int dir) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct dmar_domain *domain; - u64 flush_addr; - unsigned int flush_size; + start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + size = aligned_size((u64)dev_addr, size); - if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) - return; + pr_debug("Device %s unmapping: %lx@%llx\n", + pci_name(pdev), size, (u64)start_addr); - domain = find_domain(pdev); - __intel_unmap_single(dev, dev_addr, size, - dir, &flush_addr, &flush_size); - if (flush_size == 0) - return; - if (iommu_flush_iotlb_psi(domain->iommu, domain->id, flush_addr, - flush_size >> PAGE_SHIFT_4K, 0)) + /* clear the whole page */ + dma_pte_clear_range(domain, start_addr, start_addr + size); + /* free page tables */ + dma_pte_free_pagetable(domain, start_addr, start_addr + size); + + if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, + size >> PAGE_SHIFT_4K, 0)) iommu_flush_write_buffer(domain->iommu); + + /* free iova */ + __free_iova(&domain->iovad, iova); } static void * intel_alloc_coherent(struct device *hwdev, size_t size, @@ -1990,28 +1962,46 @@ static void intel_free_coherent(struct device *hwdev, size_t size, free_pages((unsigned long)vaddr, order); } +#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, int dir) { int i; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; - u64 flush_addr; - unsigned int flush_size; + unsigned long start_addr; + struct iova *iova; + size_t size = 0; + void *addr; if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) return; domain = find_domain(pdev); - for (i = 0; i < nelems; i++, sg++) - __intel_unmap_single(hwdev, sg->dma_address, - sg->dma_length, dir, &flush_addr, &flush_size); - if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 0)) + iova = find_iova(&domain->iovad, IOVA_PFN(sg[0].dma_address)); + if (!iova) + return; + for (i = 0; i < nelems; i++, sg++) { + addr = SG_ENT_VIRT_ADDRESS(sg); + size += aligned_size((u64)addr, sg->length); + } + + start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + + /* clear the whole page */ + dma_pte_clear_range(domain, start_addr, start_addr + size); + /* free page tables */ + dma_pte_free_pagetable(domain, start_addr, start_addr + size); + + if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, + size >> PAGE_SHIFT_4K, 0)) iommu_flush_write_buffer(domain->iommu); + + /* free iova */ + __free_iova(&domain->iovad, iova); } -#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) static int intel_nontranslate_map_sg(struct device *hddev, struct scatterlist *sg, int nelems, int dir) { @@ -2031,33 +2021,76 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sg, { void *addr; int i; - dma_addr_t dma_handle; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; - u64 flush_addr; - unsigned int flush_size; + size_t size = 0; + int prot = 0; + size_t offset = 0; + struct iova *iova = NULL; + int ret; + struct scatterlist *orig_sg = sg; + unsigned long start_addr; BUG_ON(dir == DMA_NONE); if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO) return intel_nontranslate_map_sg(hwdev, sg, nelems, dir); + domain = get_valid_domain_for_dev(pdev); + if (!domain) + return 0; + for (i = 0; i < nelems; i++, sg++) { addr = SG_ENT_VIRT_ADDRESS(sg); - dma_handle = __intel_map_single(hwdev, addr, - sg->length, dir, &flush_addr, &flush_size); - if (!dma_handle) { - intel_unmap_sg(hwdev, sg - i, i, dir); - sg[0].dma_length = 0; + addr = (void *)virt_to_phys(addr); + size += aligned_size((u64)addr, sg->length); + } + + iova = __intel_alloc_iova(hwdev, domain, size); + if (!iova) { + orig_sg->dma_length = 0; + return 0; + } + + /* + * Check if DMAR supports zero-length reads on write only + * mappings.. + */ + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ + !cap_zlr(domain->iommu->cap)) + prot |= DMA_PTE_READ; + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + prot |= DMA_PTE_WRITE; + + start_addr = iova->pfn_lo << PAGE_SHIFT_4K; + offset = 0; + sg = orig_sg; + for (i = 0; i < nelems; i++, sg++) { + addr = SG_ENT_VIRT_ADDRESS(sg); + addr = (void *)virt_to_phys(addr); + size = aligned_size((u64)addr, sg->length); + ret = domain_page_mapping(domain, start_addr + offset, + ((u64)addr) & PAGE_MASK_4K, + size, prot); + if (ret) { + /* clear the page */ + dma_pte_clear_range(domain, start_addr, + start_addr + offset); + /* free page tables */ + dma_pte_free_pagetable(domain, start_addr, + start_addr + offset); + /* free iova */ + __free_iova(&domain->iovad, iova); return 0; } - sg->dma_address = dma_handle; + sg->dma_address = start_addr + offset + + ((u64)addr & (~PAGE_MASK_4K)); sg->dma_length = sg->length; + offset += size; } - domain = find_domain(pdev); - /* it's a non-present to present mapping */ - if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 1)) + if (iommu_flush_iotlb_psi(domain->iommu, domain->id, + start_addr, offset >> PAGE_SHIFT_4K, 1)) iommu_flush_write_buffer(domain->iommu); return nelems; } diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 717fafaa7e0..a84571c2936 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -57,12 +57,28 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) iovad->cached32_node = rb_next(&free->node); } -static int __alloc_iova_range(struct iova_domain *iovad, - unsigned long size, unsigned long limit_pfn, struct iova *new) +/* Computes the padding size required, to make the + * the start address naturally aligned on its size + */ +static int +iova_get_pad_size(int size, unsigned int limit_pfn) +{ + unsigned int pad_size = 0; + unsigned int order = ilog2(size); + + if (order) + pad_size = (limit_pfn + 1) % (1 << order); + + return pad_size; +} + +static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn, struct iova *new, bool size_aligned) { struct rb_node *curr = NULL; unsigned long flags; unsigned long saved_pfn; + unsigned int pad_size = 0; /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); @@ -72,22 +88,32 @@ static int __alloc_iova_range(struct iova_domain *iovad, struct iova *curr_iova = container_of(curr, struct iova, node); if (limit_pfn < curr_iova->pfn_lo) goto move_left; - if (limit_pfn < curr_iova->pfn_hi) + else if (limit_pfn < curr_iova->pfn_hi) goto adjust_limit_pfn; - if ((curr_iova->pfn_hi + size) <= limit_pfn) - break; /* found a free slot */ + else { + if (size_aligned) + pad_size = iova_get_pad_size(size, limit_pfn); + if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn) + break; /* found a free slot */ + } adjust_limit_pfn: limit_pfn = curr_iova->pfn_lo - 1; move_left: curr = rb_prev(curr); } - if ((!curr) && !(IOVA_START_PFN + size <= limit_pfn)) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return -ENOMEM; + if (!curr) { + if (size_aligned) + pad_size = iova_get_pad_size(size, limit_pfn); + if ((IOVA_START_PFN + size + pad_size) > limit_pfn) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; + } } - new->pfn_hi = limit_pfn; - new->pfn_lo = limit_pfn - size + 1; + + /* pfn_lo will point to size aligned address if size_aligned is set */ + new->pfn_lo = limit_pfn - (size + pad_size) + 1; + new->pfn_hi = new->pfn_lo + size - 1; spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return 0; @@ -119,12 +145,16 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova) * @iovad - iova domain in question * @size - size of page frames to allocate * @limit_pfn - max limit address + * @size_aligned - set if size_aligned address range is required * This function allocates an iova in the range limit_pfn to IOVA_START_PFN - * looking from limit_pfn instead from IOVA_START_PFN. + * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned + * flag is set then the allocated address iova->pfn_lo will be naturally + * aligned on roundup_power_of_two(size). */ struct iova * alloc_iova(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn) + unsigned long limit_pfn, + bool size_aligned) { unsigned long flags; struct iova *new_iova; @@ -134,8 +164,15 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, if (!new_iova) return NULL; + /* If size aligned is set then round the size to + * to next power of two. + */ + if (size_aligned) + size = __roundup_pow_of_two(size); + spin_lock_irqsave(&iovad->iova_alloc_lock, flags); - ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova); + ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova, + size_aligned); if (ret) { spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h index 04c22070888..ae3028d5a94 100644 --- a/drivers/pci/iova.h +++ b/drivers/pci/iova.h @@ -51,7 +51,8 @@ void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn); + unsigned long limit_pfn, + bool size_aligned); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); |