From 1ef3e2bc04223ff956dc62abaf2dff1f3322a431 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 26 Feb 2014 11:38:36 -0700 Subject: vfio/iommu_type1: Multi-IOMMU domain support We currently have a problem that we cannot support advanced features of an IOMMU domain (ex. IOMMU_CACHE), because we have no guarantee that those features will be supported by all of the hardware units involved with the domain over its lifetime. For instance, the Intel VT-d architecture does not require that all DRHDs support snoop control. If we create a domain based on a device behind a DRHD that does support snoop control and enable SNP support via the IOMMU_CACHE mapping option, we cannot then add a device behind a DRHD which does not support snoop control or we'll get reserved bit faults from the SNP bit in the pagetables. To add to the complexity, we can't know the properties of a domain until a device is attached. We could pass this problem off to userspace and require that a separate vfio container be used, but we don't know how to handle page accounting in that case. How do we know that a page pinned in one container is the same page as a different container and avoid double billing the user for the page. The solution is therefore to support multiple IOMMU domains per container. In the majority of cases, only one domain will be required since hardware is typically consistent within a system. However, this provides us the ability to validate compatibility of domains and support mixed environments where page table flags can be different between domains. To do this, our DMA tracking needs to change. We currently try to coalesce user mappings into as few tracking entries as possible. The problem then becomes that we lose granularity of user mappings. We've never guaranteed that a user is able to unmap at a finer granularity than the original mapping, but we must honor the granularity of the original mapping. This coalescing code is therefore removed, allowing only unmaps covering complete maps. The change in accounting is fairly small here, a typical QEMU VM will start out with roughly a dozen entries, so it's arguable if this coalescing was ever needed. We also move IOMMU domain creation to the point where a group is attached to the container. An interesting side-effect of this is that we now have access to the device at the time of domain creation and can probe the devices within the group to determine the bus_type. This finally makes vfio_iommu_type1 completely device/bus agnostic. In fact, each IOMMU domain can host devices on different buses managed by different physical IOMMUs, and present a single DMA mapping interface to the user. When a new domain is created, mappings are replayed to bring the IOMMU pagetables up to the state of the current container. And of course, DMA mapping and unmapping automatically traverse all of the configured IOMMU domains. Signed-off-by: Alex Williamson Cc: Varun Sethi --- include/uapi/linux/vfio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 0fd47f5bc14..460fdf2e26f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -23,6 +23,7 @@ #define VFIO_TYPE1_IOMMU 1 #define VFIO_SPAPR_TCE_IOMMU 2 +#define VFIO_TYPE1v2_IOMMU 3 /* * The IOCTL interface is designed for extensibility by embedding the -- cgit v1.2.3-70-g09d2 From aa429318279b90192f35a97e9ccdc1e83b3a9624 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 26 Feb 2014 11:38:37 -0700 Subject: vfio/type1: Add extension to test DMA cache coherence of IOMMU Now that the type1 IOMMU backend can support IOMMU_CACHE, we need to be able to test whether coherency is currently enforced. Add an extension for this. Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 21 +++++++++++++++++++++ include/uapi/linux/vfio.h | 5 +++++ 2 files changed, 26 insertions(+) (limited to 'include/uapi') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 8c7bb9befda..1f90344d3e2 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -867,6 +867,23 @@ static void vfio_iommu_type1_release(void *iommu_data) kfree(iommu); } +static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) +{ + struct vfio_domain *domain; + int ret = 1; + + mutex_lock(&iommu->lock); + list_for_each_entry(domain, &iommu->domain_list, next) { + if (!(domain->prot & IOMMU_CACHE)) { + ret = 0; + break; + } + } + mutex_unlock(&iommu->lock); + + return ret; +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -878,6 +895,10 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, case VFIO_TYPE1_IOMMU: case VFIO_TYPE1v2_IOMMU: return 1; + case VFIO_DMA_CC_IOMMU: + if (!iommu) + return 0; + return vfio_domains_have_iommu_cache(iommu); default: return 0; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 460fdf2e26f..cb9023d4f06 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -24,6 +24,11 @@ #define VFIO_TYPE1_IOMMU 1 #define VFIO_SPAPR_TCE_IOMMU 2 #define VFIO_TYPE1v2_IOMMU 3 +/* + * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping). This + * capability is subject to change as groups are added or removed. + */ +#define VFIO_DMA_CC_IOMMU 4 /* * The IOCTL interface is designed for extensibility by embedding the -- cgit v1.2.3-70-g09d2