On 6/20/25 9:18 AM, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l....@intel.com>
>
> This replays guest pasid attachments after context cache invalidation.
> This is a behavior to ensure safety. Actually, programmer should issue
> pasid cache invalidation with proper granularity after issuing a context
> cache invalidation.
so is it mandated to do the invalidation twice?
>
> Signed-off-by: Yi Liu <yi.l....@intel.com>
> Signed-off-by: Yi Sun <yi.y....@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 1 +
> hw/i386/intel_iommu.c | 51 ++++++++++++++++++++++++++++++++--
> hw/i386/trace-events | 1 +
> 3 files changed, 51 insertions(+), 2 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 92a533db54..b3e4aa23f1 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -575,6 +575,7 @@ typedef enum VTDPCInvType {
> VTD_PASID_CACHE_FORCE_RESET = 0,
> /* pasid cache invalidation rely on guest PASID entry */
> VTD_PASID_CACHE_GLOBAL_INV, /* pasid cache global invalidation */
> + VTD_PASID_CACHE_DEVSI, /* pasid cache device selective invalidation
> */
> VTD_PASID_CACHE_DOMSI, /* pasid cache domain selective invalidation
> */
> VTD_PASID_CACHE_PASIDSI, /* pasid cache pasid selective invalidation
> */
> } VTDPCInvType;
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 1c94a0033c..621b07aa02 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -92,6 +92,10 @@ static void vtd_address_space_refresh_all(IntelIOMMUState
> *s);
> static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
>
> static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s);
> +static void vtd_pasid_cache_sync(IntelIOMMUState *s,
> + VTDPASIDCacheInfo *pc_info);
> +static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
> + PCIBus *bus, uint16_t devfn);
>
> static void vtd_panic_require_caching_mode(void)
> {
> @@ -2437,6 +2441,8 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
>
> static void vtd_context_global_invalidate(IntelIOMMUState *s)
> {
> + VTDPASIDCacheInfo pc_info = { .error_happened = false, };
> +
> trace_vtd_inv_desc_cc_global();
> /* Protects context cache */
> vtd_iommu_lock(s);
> @@ -2454,6 +2460,9 @@ static void
> vtd_context_global_invalidate(IntelIOMMUState *s)
> * VT-d emulation codes.
> */
> vtd_iommu_replay_all(s);
> +
> + pc_info.type = VTD_PASID_CACHE_GLOBAL_INV;
> + vtd_pasid_cache_sync(s, &pc_info);
> }
>
> #ifdef CONFIG_IOMMUFD
> @@ -2696,6 +2705,21 @@ static void
> vtd_context_device_invalidate(IntelIOMMUState *s,
> * happened.
> */
> vtd_address_space_sync(vtd_as);
> + /*
> + * Per spec, context flush should also followed with PASID
be followed
> + * cache and iotlb flush. Regards to a device selective
regarding to?
> + * context cache invalidation:
> + * if (emaulted_device)
emulatted
> + * invalidate pasid cache and pasid-based iotlb
> + * else if (assigned_device)
> + * check if the device has been bound to any pasid
> + * invoke pasid_unbind regards to each bound pasid
> + * Here, we have vtd_pasid_cache_devsi() to invalidate pasid
> + * caches, while for piotlb in QEMU, we don't have it yet, so
> + * no handling. For assigned device, host iommu driver would
> + * flush piotlb when a pasid unbind is pass down to it.
> + */
> + vtd_pasid_cache_devsi(s, vtd_as->bus, devfn);
> }
> }
> }
> @@ -3447,6 +3471,11 @@ static gboolean vtd_flush_pasid(gpointer key, gpointer
> value,
> /* Fall through */
> case VTD_PASID_CACHE_GLOBAL_INV:
> break;
> + case VTD_PASID_CACHE_DEVSI:
> + if (pc_info->bus != vtd_as->bus || pc_info->devfn != vtd_as->devfn) {
> + return false;
> + }
> + break;
> default:
> error_report("invalid pc_info->type");
> abort();
> @@ -3640,6 +3669,11 @@ static void
> vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
> case VTD_PASID_CACHE_GLOBAL_INV:
> /* loop all assigned devices */
> break;
> + case VTD_PASID_CACHE_DEVSI:
> + walk_info.bus = pc_info->bus;
> + walk_info.devfn = pc_info->devfn;
> + vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
> + return;
> case VTD_PASID_CACHE_FORCE_RESET:
> /* For force reset, no need to go further replay */
> return;
> @@ -3675,8 +3709,7 @@ static void
> vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
> * It includes updating the pasid cache in vIOMMU and updating the
> * pasid bindings per guest's latest pasid entry presence.
> */
> -static void vtd_pasid_cache_sync(IntelIOMMUState *s,
> - VTDPASIDCacheInfo *pc_info)
> +static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo
> *pc_info)
> {
> if (!s->flts || !s->root_scalable || !s->dmar_enabled) {
> return;
> @@ -3737,6 +3770,20 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s,
> vtd_replay_guest_pasid_bindings(s, pc_info);
> }
>
> +static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
> + PCIBus *bus, uint16_t devfn)
> +{
> + VTDPASIDCacheInfo pc_info = { .error_happened = false, };
> +
> + trace_vtd_pasid_cache_devsi(devfn);
> +
> + pc_info.type = VTD_PASID_CACHE_DEVSI;
> + pc_info.bus = bus;
> + pc_info.devfn = devfn;
> +
> + vtd_pasid_cache_sync(s, &pc_info);
> +}
> +
> static bool vtd_process_pasid_desc(IntelIOMMUState *s,
> VTDInvDesc *inv_desc)
> {
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 1c31b9a873..830b11f68b 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -28,6 +28,7 @@ vtd_pasid_cache_reset(void) ""
> vtd_pasid_cache_gsi(void) ""
> vtd_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation
> domain 0x%"PRIx16
> vtd_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC
> invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
> +vtd_pasid_cache_devsi(uint16_t devfn) "Dev selective PC invalidation dev:
> 0x%"PRIx16
> vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
> vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8"
> devfn %"PRIu8" not present"
> vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t
> domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64"
> domain 0x%"PRIx16
Eric