> From: Jason Gunthorpe <[email protected]>
> Sent: Thursday, March 24, 2022 6:55 AM
>
> On Wed, Mar 23, 2022 at 05:34:18PM -0300, Jason Gunthorpe wrote:
>
> > Stated another way, any platform that wires dev_is_dma_coherent() to
> > true, like all x86 does, must support IOMMU_CACHE and report
> > IOMMU_CAP_CACHE_COHERENCY for every iommu_domain the platform
> > supports. The platform obviously declares it support this in order to
> > support the in-kernel DMA API.
>
> That gives me a nice simple idea:
>
> diff --git a/drivers/iommu/iommufd/device.c
> b/drivers/iommu/iommufd/device.c
> index 3c6b95ad026829..8366884df4a030 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -8,6 +8,7 @@
> #include <linux/pci.h>
> #include <linux/irqdomain.h>
> #include <linux/dma-iommu.h>
> +#include <linux/dma-map-ops.h>
>
> #include "iommufd_private.h"
>
> @@ -61,6 +62,10 @@ struct iommufd_device
> *iommufd_bind_pci_device(int fd, struct pci_dev *pdev,
> struct iommu_group *group;
> int rc;
>
> + /* iommufd always uses IOMMU_CACHE */
> + if (!dev_is_dma_coherent(&pdev->dev))
> + return ERR_PTR(-EINVAL);
> +
> ictx = iommufd_fget(fd);
> if (!ictx)
> return ERR_PTR(-EINVAL);
> diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
> index 48149988c84bbc..3d6df1ffbf93e6 100644
> --- a/drivers/iommu/iommufd/ioas.c
> +++ b/drivers/iommu/iommufd/ioas.c
> @@ -129,7 +129,8 @@ static int conv_iommu_prot(u32 map_flags)
> * We provide no manual cache coherency ioctls to userspace and
> most
> * architectures make the CPU ops for cache flushing privileged.
> * Therefore we require the underlying IOMMU to support CPU
> coherent
> - * operation.
> + * operation. Support for IOMMU_CACHE is enforced by the
> + * dev_is_dma_coherent() test during bind.
> */
> iommu_prot = IOMMU_CACHE;
> if (map_flags & IOMMU_IOAS_MAP_WRITEABLE)
>
> Looking at it I would say all the places that test
> IOMMU_CAP_CACHE_COHERENCY can be replaced with
> dev_is_dma_coherent()
> except for the one call in VFIO that is supporting the Intel no-snoop
> behavior.
>
> Then we can rename IOMMU_CAP_CACHE_COHERENCY to something like
> IOMMU_CAP_ENFORCE_CACHE_COHERENCY and just create a
> IOMMU_ENFORCE_CACHE prot flag for Intel IOMMU to use instead of
> abusing IOMMU_CACHE.
>
Based on that here is a quick tweak of the force-snoop part (not compiled).
Several notes:
- IOMMU_CAP_CACHE_COHERENCY is kept as it's checked in vfio's
group attach interface. Removing it may require a group_is_dma_coherent();
- vdpa is not changed as force-snoop is only for integrated GPU today which
is not managed by vdpa. But adding the snoop support is easy if necessary;
- vfio type1 reports force-snoop fact to KVM via VFIO_DMA_CC_IOMMU. For
iommufd the compat layer may leverage that interface but more thoughts
are required for non-compat usage how that can be reused or whether a
new one is required between iommufd and kvm. Per earlier discussions
Paolo prefers to reuse.
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5b196cf..06cca04 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5110,7 +5110,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
prot |= DMA_PTE_WRITE;
- if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
+ /* nothing to do for IOMMU_CACHE */
+ if ((iommu_prot & IOMMU_SNOOP) && dmar_domain->iommu_snooping)
prot |= DMA_PTE_SNP;
max_addr = iova + size;
@@ -5236,6 +5237,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct
iommu_domain *domain,
static bool intel_iommu_capable(enum iommu_cap cap)
{
if (cap == IOMMU_CAP_CACHE_COHERENCY)
+ return true;
+ if (cap == IOMMU_CAP_FORCE_SNOOP)
return domain_update_iommu_snooping(NULL);
if (cap == IOMMU_CAP_INTR_REMAP)
return irq_remapping_enabled == 1;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 9394aa9..abc4cfe 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2270,6 +2270,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
domain->prot |= IOMMU_CACHE;
+ if (iommu_capable(bus, IOMMU_CAP_FORCE_SNOOP)
+ domain->prot |= IOMMU_SNOOP;
+
/*
* Try to match an existing compatible domain. We don't want to
* preclude an IOMMU driver supporting multiple bus_types and being
@@ -2611,14 +2614,14 @@ static void vfio_iommu_type1_release(void *iommu_data)
kfree(iommu);
}
-static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
+static int vfio_domains_have_iommu_snoop(struct vfio_iommu *iommu)
{
struct vfio_domain *domain;
int ret = 1;
mutex_lock(&iommu->lock);
list_for_each_entry(domain, &iommu->domain_list, next) {
- if (!(domain->prot & IOMMU_CACHE)) {
+ if (!(domain->prot & IOMMU_SNOOP)) {
ret = 0;
break;
}
@@ -2641,7 +2644,7 @@ static int vfio_iommu_type1_check_extension(struct
vfio_iommu *iommu,
case VFIO_DMA_CC_IOMMU:
if (!iommu)
return 0;
- return vfio_domains_have_iommu_cache(iommu);
+ return vfio_domains_have_iommu_snoop(iommu);
default:
return 0;
}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index de0c57a..45184d7 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -21,6 +21,8 @@
#define IOMMU_CACHE (1 << 2) /* DMA cache coherency */
#define IOMMU_NOEXEC (1 << 3)
#define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */
+#define IOMMU_SNOOP (1 << 5) /* force DMA to snoop */
+
/*
* Where the bus hardware includes a privilege level as part of its access type
* markings, and certain devices are capable of issuing transactions marked as
@@ -106,6 +108,8 @@ enum iommu_cap {
transactions */
IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */
IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */
+ IOMMU_CAP_FORCE_SNOOP, /* IOMMU forces all transactions to
+ snoop cache */
};
/* These are the possible reserved region types */
Thanks
Kevin
_______________________________________________
iommu mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/iommu