This patch removes any implicit flushing that occurs in the implementation
of map and unmap operations and, instead, adds explicit flushing at the
end of the loops in the iommu_map/unmap() wrapper functions.
Because VT-d currently performs two different types of flush dependent upon
whether a PTE is being modified versus merely added (i.e. replacing a non-
present PTE) a 'iommu_flush_type' enumeration is defined by this patch and
the iommu_ops map method is modified to pass back the type of flush
necessary for the PTE that has been populated. When a higher order mapping
operation is done, the wrapper code performs the 'highest' level of flush
required by the individual iommu_ops method calls, where a 'modified PTE'
flush is deemed to be higher than a 'added PTE' flush. The ARM SMMU
implementation currently performs no implicit flushing and therefore
the modified map method always passes back a flush type of 'none'.
NOTE: The per-cpu 'iommu_dont_flush_iotlb' is respected by the iommu_map()
wrapper function and therefore this now applies to all IOMMU
implementations rather than just VT-d. Use of the flag has been added
to arch_iommu_hwdom_init() so that flushing is now fully elided for
the hardware domain's initial table population.
Signed-off-by: Paul Durrant <[email protected]>
---
Cc: Stefano Stabellini <[email protected]>
Cc: Julien Grall <[email protected]>
Cc: Suravee Suthikulpanit <[email protected]>
Cc: Brian Woods <[email protected]>
Cc: Jan Beulich <[email protected]>
Cc: Kevin Tian <[email protected]>
Cc: Andrew Cooper <[email protected]>
Cc: Wei Liu <[email protected]>
Cc: "Roger Pau Monné" <[email protected]>
This code has only been compile tested for ARM.
---
xen/drivers/passthrough/amd/iommu_map.c | 70 ++++++++++++++++-----------
xen/drivers/passthrough/arm/smmu.c | 13 +++--
xen/drivers/passthrough/iommu.c | 31 ++++++++++--
xen/drivers/passthrough/vtd/iommu.c | 26 +++++-----
xen/drivers/passthrough/x86/iommu.c | 15 ++++--
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 8 +--
xen/include/xen/iommu.h | 13 ++++-
7 files changed, 121 insertions(+), 55 deletions(-)
diff --git a/xen/drivers/passthrough/amd/iommu_map.c
b/xen/drivers/passthrough/amd/iommu_map.c
index c05b042821..6afc412d3f 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -45,13 +45,14 @@ static void clear_iommu_pte_present(unsigned long l1_mfn,
unsigned long dfn)
unmap_domain_page(table);
}
-static bool set_iommu_pde_present(uint32_t *pde, unsigned long next_mfn,
- unsigned int next_level,
- bool iw, bool ir)
+static enum iommu_flush_type set_iommu_pde_present(
+ uint32_t *pde, unsigned long next_mfn, unsigned int next_level, bool iw,
+ bool ir)
{
uint64_t maddr_next;
uint32_t addr_lo, addr_hi, entry;
- bool need_flush = false, old_present;
+ bool old_present;
+ enum iommu_flush_type flush_type = IOMMU_FLUSH_none;
maddr_next = __pfn_to_paddr(next_mfn);
@@ -84,7 +85,7 @@ static bool set_iommu_pde_present(uint32_t *pde, unsigned
long next_mfn,
if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
old_level != next_level )
- need_flush = true;
+ flush_type = IOMMU_FLUSH_modified;
}
addr_lo = maddr_next & DMA_32BIT_MASK;
@@ -121,24 +122,24 @@ static bool set_iommu_pde_present(uint32_t *pde, unsigned
long next_mfn,
IOMMU_PDE_PRESENT_SHIFT, &entry);
pde[0] = entry;
- return need_flush;
+ return flush_type;
}
-static bool set_iommu_pte_present(unsigned long pt_mfn, unsigned long dfn,
- unsigned long next_mfn, int pde_level,
- bool iw, bool ir)
+static enum iommu_flush_type set_iommu_pte_present(
+ unsigned long pt_mfn, unsigned long dfn, unsigned long next_mfn,
+ int pde_level, bool iw, bool ir)
{
uint64_t *table;
uint32_t *pde;
- bool need_flush;
+ enum iommu_flush_type flush_type;
table = map_domain_page(_mfn(pt_mfn));
pde = (uint32_t *)(table + pfn_to_pde_idx(dfn, pde_level));
- need_flush = set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
+ flush_type = set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
unmap_domain_page(table);
- return need_flush;
+ return flush_type;
}
void amd_iommu_set_root_page_table(uint32_t *dte, uint64_t root_ptr,
@@ -522,13 +523,15 @@ static int update_paging_mode(struct domain *d, unsigned
long dfn)
}
int amd_iommu_map_page(struct domain *d, dfn_t dfn, mfn_t mfn,
- unsigned int flags)
+ unsigned int flags,
+ enum iommu_flush_type *flush_type)
{
- bool need_flush;
struct domain_iommu *hd = dom_iommu(d);
int rc;
unsigned long pt_mfn[7];
+ *flush_type = IOMMU_FLUSH_none;
+
if ( iommu_use_hap_pt(d) )
return 0;
@@ -570,12 +573,9 @@ int amd_iommu_map_page(struct domain *d, dfn_t dfn, mfn_t
mfn,
}
/* Install 4k mapping */
- need_flush = set_iommu_pte_present(pt_mfn[1], dfn_x(dfn), mfn_x(mfn), 1,
- !!(flags & IOMMUF_writable),
- !!(flags & IOMMUF_readable));
-
- if ( need_flush )
- amd_iommu_flush_pages(d, dfn_x(dfn), 0);
+ *flush_type = set_iommu_pte_present(pt_mfn[1], dfn_x(dfn), mfn_x(mfn),
+ 1, !!(flags & IOMMUF_writable),
+ !!(flags & IOMMUF_readable));
spin_unlock(&hd->arch.mapping_lock);
return 0;
@@ -629,8 +629,6 @@ int amd_iommu_unmap_page(struct domain *d, dfn_t dfn)
clear_iommu_pte_present(pt_mfn[1], dfn_x(dfn));
spin_unlock(&hd->arch.mapping_lock);
-
- amd_iommu_flush_pages(d, dfn_x(dfn), 0);
return 0;
}
@@ -646,8 +644,14 @@ static unsigned long flush_count(dfn_t dfn, unsigned int
page_count,
}
int amd_iommu_flush_iotlb_pages(struct domain *d, dfn_t dfn,
- unsigned int page_count)
+ unsigned int page_count,
+ enum iommu_flush_type flush_type)
{
+ if ( flush_type == IOMMU_FLUSH_none)
+ return 0;
+
+ ASSERT(flush_type == IOMMU_FLUSH_modified);
+
/* Match VT-d semantics */
if ( !page_count || dfn_eq(dfn, INVALID_DFN) ||
dfn_lt(dfn_add(dfn, page_count), dfn) /* overflow */ )
@@ -687,10 +691,11 @@ int amd_iommu_reserve_domain_unity_map(struct domain
*domain,
paddr_t phys_addr,
unsigned long size, int iw, int ir)
{
+ enum iommu_flush_type flush_type = IOMMU_FLUSH_none;
unsigned long npages, i;
unsigned long gfn;
unsigned int flags = !!ir;
- int rt = 0;
+ int ignored, rt = 0;
if ( iw )
flags |= IOMMUF_writable;
@@ -700,12 +705,23 @@ int amd_iommu_reserve_domain_unity_map(struct domain
*domain,
for ( i = 0; i < npages; i++ )
{
unsigned long frame = gfn + i;
+ enum iommu_flush_type this_flush_type;
- rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame), flags);
+ rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame), flags,
+ &this_flush_type);
if ( rt != 0 )
- return rt;
+ break;
+
+ flush_type = MAX(flush_type, this_flush_type);
}
- return 0;
+
+ /*
+ * The underlying implementation is void so the return value is
+ * meaningless and can hence be ignored.
+ */
+ ignored = amd_iommu_flush_iotlb_pages(domain, _dfn(gfn), npages,
+ flush_type);
+ return rt;
}
/* Share p2m table with iommu. */
diff --git a/xen/drivers/passthrough/arm/smmu.c
b/xen/drivers/passthrough/arm/smmu.c
index 9612c0fddc..25b95d0197 100644
--- a/xen/drivers/passthrough/arm/smmu.c
+++ b/xen/drivers/passthrough/arm/smmu.c
@@ -2534,8 +2534,9 @@ static int __must_check arm_smmu_iotlb_flush_all(struct
domain *d)
return 0;
}
-static int __must_check arm_smmu_iotlb_flush(struct domain *d, dfn_t dfn,
- unsigned int page_count)
+static int __must_check arm_smmu_iotlb_flush(
+ struct domain *d, dfn_t dfn, unsigned int page_count,
+ enum iommu_flush_type flush_type)
{
/* ARM SMMU v1 doesn't have flush by VMA and VMID */
return arm_smmu_iotlb_flush_all(d);
@@ -2731,8 +2732,9 @@ static void arm_smmu_iommu_domain_teardown(struct domain
*d)
xfree(xen_domain);
}
-static int __must_check arm_smmu_map_page(struct domain *d, dfn_t dfn,
- mfn_t mfn, unsigned int flags)
+static int __must_check arm_smmu_map_page(
+ struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int flags,
+ enum iommu_flush_type *flush_type)
{
p2m_type_t t;
@@ -2753,6 +2755,9 @@ static int __must_check arm_smmu_map_page(struct domain
*d, dfn_t dfn,
t = (flags & IOMMUF_writable) ? p2m_iommu_map_rw : p2m_iommu_map_ro;
+ /* Flushing is always done explicitly by th P2M code */
+ *flush_type = IOMMU_FLUSH_none;
+
/*
* The function guest_physmap_add_entry replaces the current mapping
* if there is already one...
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index ac62d7f52a..bfd9874034 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -219,6 +219,7 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
unsigned long mfn = mfn_x(page_to_mfn(page));
unsigned long dfn = mfn_to_gmfn(d, mfn);
unsigned int mapping = IOMMUF_readable;
+ enum iommu_flush_type flush_type;
int ret;
if ( ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
@@ -227,7 +228,7 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
mapping |= IOMMUF_writable;
ret = hd->platform_ops->map_page(d, _dfn(dfn), _mfn(mfn),
- mapping);
+ mapping, &flush_type);
if ( !rc )
rc = ret;
@@ -235,6 +236,9 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
process_pending_softirqs();
}
+ if ( !rc && hd->platform_ops->iotlb_flush_all )
+ rc = hd->platform_ops->iotlb_flush_all(d);
+
if ( rc )
printk(XENLOG_WARNING "d%d: IOMMU mapping failed: %d\n",
d->domain_id, rc);
@@ -308,6 +312,7 @@ int iommu_map(struct domain *d, dfn_t dfn, mfn_t mfn,
unsigned int page_order, unsigned int flags)
{
const struct domain_iommu *hd = dom_iommu(d);
+ enum iommu_flush_type flush_type = IOMMU_FLUSH_none;
unsigned long i;
int rc = 0;
@@ -319,11 +324,18 @@ int iommu_map(struct domain *d, dfn_t dfn, mfn_t mfn,
for ( i = 0; i < (1ul << page_order); i++ )
{
+ enum iommu_flush_type this_flush_type;
+ int ignore;
+
rc = hd->platform_ops->map_page(d, dfn_add(dfn, i),
- mfn_add(mfn, i), flags);
+ mfn_add(mfn, i), flags,
+ &this_flush_type);
if ( likely(!rc) )
+ {
+ flush_type = MAX(flush_type, this_flush_type);
continue;
+ }
if ( !d->is_shutting_down && printk_ratelimit() )
printk(XENLOG_ERR
@@ -336,12 +348,19 @@ int iommu_map(struct domain *d, dfn_t dfn, mfn_t mfn,
if ( hd->platform_ops->unmap_page(d, dfn_add(dfn, i)) )
continue;
+ /* Something went wrong so attempt a full flush */
+ ignore = hd->platform_ops->iotlb_flush_all(d);
+
if ( !is_hardware_domain(d) )
domain_crash(d);
break;
}
+ if ( hd->platform_ops->iotlb_flush && !this_cpu(iommu_dont_flush_iotlb) )
+ rc = hd->platform_ops->iotlb_flush(d, dfn, (1ul << page_order),
+ flush_type);
+
return rc;
}
@@ -378,6 +397,10 @@ int iommu_unmap(struct domain *d, dfn_t dfn, unsigned int
page_order)
}
}
+ if ( hd->platform_ops->iotlb_flush )
+ rc = hd->platform_ops->iotlb_flush(d, dfn, (1ul << page_order),
+ IOMMU_FLUSH_modified);
+
return rc;
}
@@ -417,7 +440,9 @@ int iommu_iotlb_flush(struct domain *d, dfn_t dfn, unsigned
int page_count)
if ( !iommu_enabled || !hd->platform_ops || !hd->platform_ops->iotlb_flush
)
return 0;
- rc = hd->platform_ops->iotlb_flush(d, dfn, page_count);
+ /* Assume a 'modified' flush is required */
+ rc = hd->platform_ops->iotlb_flush(d, dfn, page_count,
+ IOMMU_FLUSH_modified);
if ( unlikely(rc) )
{
if ( !d->is_shutting_down && printk_ratelimit() )
diff --git a/xen/drivers/passthrough/vtd/iommu.c
b/xen/drivers/passthrough/vtd/iommu.c
index 1601278b07..d547ba3d23 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -631,11 +631,14 @@ static int __must_check iommu_flush_iotlb(struct domain
*d, dfn_t dfn,
return rc;
}
-static int __must_check iommu_flush_iotlb_pages(struct domain *d,
- dfn_t dfn,
- unsigned int page_count)
+static int __must_check iommu_flush_iotlb_pages(
+ struct domain *d, dfn_t dfn, unsigned int page_count,
+ enum iommu_flush_type flush_type)
{
- return iommu_flush_iotlb(d, dfn, 1, page_count);
+ return (flush_type == IOMMU_FLUSH_none) ?
+ 0 :
+ iommu_flush_iotlb(d, dfn, (flush_type == IOMMU_FLUSH_modified),
+ page_count);
}
static int __must_check iommu_flush_iotlb_all(struct domain *d)
@@ -674,9 +677,6 @@ static int __must_check dma_pte_clear_one(struct domain
*domain, u64 addr)
spin_unlock(&hd->arch.mapping_lock);
iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
- if ( !this_cpu(iommu_dont_flush_iotlb) )
- rc = iommu_flush_iotlb_pages(domain, daddr_to_dfn(addr), 1);
-
unmap_vtd_domain_page(page);
return rc;
@@ -1771,15 +1771,17 @@ static void iommu_domain_teardown(struct domain *d)
spin_unlock(&hd->arch.mapping_lock);
}
-static int __must_check intel_iommu_map_page(struct domain *d,
- dfn_t dfn, mfn_t mfn,
- unsigned int flags)
+static int __must_check intel_iommu_map_page(
+ struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int flags,
+ enum iommu_flush_type *flush_type)
{
struct domain_iommu *hd = dom_iommu(d);
struct dma_pte *page, *pte, old, new = {};
u64 pg_maddr;
int rc = 0;
+ *flush_type = IOMMU_FLUSH_none;
+
/* Do nothing if VT-d shares EPT page table */
if ( iommu_use_hap_pt(d) )
return 0;
@@ -1823,8 +1825,8 @@ static int __must_check intel_iommu_map_page(struct
domain *d,
spin_unlock(&hd->arch.mapping_lock);
unmap_vtd_domain_page(page);
- if ( !this_cpu(iommu_dont_flush_iotlb) )
- rc = iommu_flush_iotlb(d, dfn, dma_pte_present(old), 1);
+ *flush_type = dma_pte_present(old) ?
+ IOMMU_FLUSH_modified : IOMMU_FLUSH_added;
return rc;
}
diff --git a/xen/drivers/passthrough/x86/iommu.c
b/xen/drivers/passthrough/x86/iommu.c
index c68a72279d..04b6330336 100644
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -50,7 +50,6 @@ int arch_iommu_populate_page_table(struct domain *d)
struct page_info *page;
int rc = 0, n = 0;
- this_cpu(iommu_dont_flush_iotlb) = 1;
spin_lock(&d->page_alloc_lock);
if ( unlikely(d->is_dying) )
@@ -63,6 +62,7 @@ int arch_iommu_populate_page_table(struct domain *d)
{
unsigned long mfn = mfn_x(page_to_mfn(page));
unsigned long gfn = mfn_to_gmfn(d, mfn);
+ enum iommu_flush_type flush_type;
if ( gfn != gfn_x(INVALID_GFN) )
{
@@ -70,7 +70,8 @@ int arch_iommu_populate_page_table(struct domain *d)
BUG_ON(SHARED_M2P(gfn));
rc = hd->platform_ops->map_page(d, _dfn(gfn), _mfn(mfn),
IOMMUF_readable |
- IOMMUF_writable);
+ IOMMUF_writable,
+ &flush_type);
}
if ( rc )
{
@@ -104,7 +105,6 @@ int arch_iommu_populate_page_table(struct domain *d)
}
spin_unlock(&d->page_alloc_lock);
- this_cpu(iommu_dont_flush_iotlb) = 0;
if ( !rc )
rc = iommu_iotlb_flush_all(d);
@@ -207,6 +207,7 @@ static bool __hwdom_init hwdom_iommu_map(const struct
domain *d,
void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
{
unsigned long i, top, max_pfn;
+ int rc;
BUG_ON(!is_hardware_domain(d));
@@ -230,10 +231,10 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
max_pfn = (GB(4) >> PAGE_SHIFT) - 1;
top = max(max_pdx, pfn_to_pdx(max_pfn) + 1);
+ this_cpu(iommu_dont_flush_iotlb) = 1;
for ( i = 0; i < top; i++ )
{
unsigned long pfn = pdx_to_pfn(i);
- int rc;
if ( !hwdom_iommu_map(d, pfn, max_pfn) )
continue;
@@ -250,6 +251,12 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
if (!(i & 0xfffff))
process_pending_softirqs();
}
+ this_cpu(iommu_dont_flush_iotlb) = 0;
+
+ rc = iommu_iotlb_flush_all(d);
+ if ( rc )
+ printk(XENLOG_WARNING " d%d: IOMMU flush failed: %d\n",
+ d->domain_id, rc);
}
/*
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 88715329ca..5d86cf9850 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -53,15 +53,17 @@ int amd_iommu_update_ivrs_mapping_acpi(void);
/* mapping functions */
int __must_check amd_iommu_map_page(struct domain *d, dfn_t dfn,
- mfn_t mfn, unsigned int flags);
+ mfn_t mfn, unsigned int flags,
+ enum iommu_flush_type *flush_type);
int __must_check amd_iommu_unmap_page(struct domain *d, dfn_t dfn);
uint64_t amd_iommu_get_address_from_pte(void *entry);
int __must_check amd_iommu_alloc_root(struct domain_iommu *hd);
int amd_iommu_reserve_domain_unity_map(struct domain *domain,
paddr_t phys_addr, unsigned long size,
int iw, int ir);
-int __must_check amd_iommu_flush_iotlb_pages(struct domain *d, dfn_t dfn,
- unsigned int page_count);
+int __must_check amd_iommu_flush_iotlb_pages(
+ struct domain *d, dfn_t dfn, unsigned int page_count,
+ enum iommu_flush_type flush_type);
int __must_check amd_iommu_flush_iotlb_all(struct domain *d);
/* Share p2m table with iommu */
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index da8294bac8..289e0e2772 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -155,6 +155,13 @@ struct page_info;
*/
typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt);
+enum iommu_flush_type
+{
+ IOMMU_FLUSH_none, /* no flush required */
+ IOMMU_FLUSH_added, /* no modified entries, just additional entries */
+ IOMMU_FLUSH_modified, /* modified entries */
+};
+
struct iommu_ops {
int (*init)(struct domain *d);
void (*hwdom_init)(struct domain *d);
@@ -177,7 +184,8 @@ struct iommu_ops {
* other by the caller in order to have meaningful results.
*/
int __must_check (*map_page)(struct domain *d, dfn_t dfn, mfn_t mfn,
- unsigned int flags);
+ unsigned int flags,
+ enum iommu_flush_type *flush_type);
int __must_check (*unmap_page)(struct domain *d, dfn_t dfn);
int __must_check (*lookup_page)(struct domain *d, dfn_t dfn, mfn_t *mfn,
unsigned int *flags);
@@ -193,7 +201,8 @@ struct iommu_ops {
void (*share_p2m)(struct domain *d);
void (*crash_shutdown)(void);
int __must_check (*iotlb_flush)(struct domain *d, dfn_t dfn,
- unsigned int page_count);
+ unsigned int page_count,
+ enum iommu_flush_type flush_type);
int __must_check (*iotlb_flush_all)(struct domain *d);
int (*get_reserved_device_memory)(iommu_grdm_t *, void *);
void (*dump_p2m_table)(struct domain *d);
--
2.11.0
_______________________________________________
Xen-devel mailing list
[email protected]
https://lists.xenproject.org/mailman/listinfo/xen-devel