Hi Eric, >-----Original Message----- >From: Eric Auger <eric.au...@redhat.com> >Subject: Re: [PATCH v2 10/19] intel_iommu: Handle PASID entry removing and >updating > >Hi Zhenzhong, > >On 6/20/25 9:18 AM, Zhenzhong Duan wrote: >I would suggest: Handle PASID entry removal and update instead of verbing. >> This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the >> pasid entry and track PASID usage and future PASID tagged DMA address >> translation support in vIOMMU. >> >> VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and >> never freed. For other pasid, VTDAddressSpace instance is created/destroyed >> per the guest pasid entry set up/destroy for passthrough devices. While for >> emulated devices, VTDAddressSpace instance is created in the PASID tagged >DMA >> translation and be destroyed per guest PASID cache invalidation. This focuses >s/be detroyed/destroyed >> on the PASID cache management for passthrough devices as there is no PASID >> capable emulated devices yet. >if you don't handle the emulated device case, may be omit talking about >them here.
OK, will drop the sentence starting from "While for emulated devices..." >> >> When guest modifies a PASID entry, QEMU will capture the guest pasid >selective >> pasid cache invalidation, allocate or remove a VTDAddressSpace instance per >the >> invalidation reasons: >> >> a) a present pasid entry moved to non-present >> b) a present pasid entry to be a present entry >> c) a non-present pasid entry moved to present >> >> This handles a) and b), following patch will handle c). >This -> This patch Will do. >> >> vIOMMU emulator could figure out the reason by fetching latest guest pasid >entry >> and compare it with the PASID cache. >> >> Signed-off-by: Yi Liu <yi.l....@intel.com> >> Signed-off-by: Yi Sun <yi.y....@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> hw/i386/intel_iommu_internal.h | 27 ++++ >> include/hw/i386/intel_iommu.h | 6 + >> hw/i386/intel_iommu.c | 265 +++++++++++++++++++++++++++++++-- >> hw/i386/trace-events | 3 + >> 4 files changed, 291 insertions(+), 10 deletions(-) >> >> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h >> index 18bc22fc72..01c881ed4d 100644 >> --- a/hw/i386/intel_iommu_internal.h >> +++ b/hw/i386/intel_iommu_internal.h >> @@ -315,6 +315,7 @@ typedef enum VTDFaultReason { >> * request while disabled */ >> VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ >> >> + VTD_FR_RTADDR_INV_TTM = 0x31, /* Invalid TTM in RTADDR */ >> /* PASID directory entry access failure */ >> VTD_FR_PASID_DIR_ACCESS_ERR = 0x50, >> /* The Present(P) field of pasid directory entry is 0 */ >> @@ -492,6 +493,15 @@ typedef union VTDInvDesc VTDInvDesc; >> #define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL >> #define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL >> >Adding some basic comments for the uncultured reader like me would help >doing the link with the vtd spec: >PASID-cache Invalidate Descriptor (pc_inv_dsc) fields Will do. >> +#define VTD_INV_DESC_PASIDC_G (3ULL << 4) >> +#define VTD_INV_DESC_PASIDC_PASID(val) (((val) >> 32) & 0xfffffULL) >> +#define VTD_INV_DESC_PASIDC_DID(val) (((val) >> 16) & >VTD_DOMAIN_ID_MASK) >> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0 0xfff000000000f1c0ULL >> + >> +#define VTD_INV_DESC_PASIDC_DSI (0ULL << 4) >> +#define VTD_INV_DESC_PASIDC_PASID_SI (1ULL << 4) >> +#define VTD_INV_DESC_PASIDC_GLOBAL (3ULL << 4) >as those are values for the granularity field using > >VTD_INV_DESC_PASIDC_G_* look relevant to me. Will do. > > >I think you would gain in readability if you adopt extract32/64 syntax >like in hw/arm/smmuv3-internal.h >Looks more readable to me. This is different from existing style in intel_iommu_internal.h But I do agree with your suggestions, let me try it with this series. >> + >> /* Information about page-selective IOTLB invalidate */ >> struct VTDIOTLBPageInvInfo { >> uint16_t domain_id; >> @@ -552,6 +562,22 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | >~VTD_HAW_MASK(aw)) >> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL >> >> +typedef enum VTDPCInvType { >> + /* pasid cache invalidation rely on guest PASID entry */ >> + VTD_PASID_CACHE_GLOBAL_INV, /* pasid cache global invalidation */ >> + VTD_PASID_CACHE_DOMSI, /* pasid cache domain selective invalidation >*/ >> + VTD_PASID_CACHE_PASIDSI, /* pasid cache pasid selective invalidation >> */ >> +} VTDPCInvType; >> + >> +typedef struct VTDPASIDCacheInfo { >> + VTDPCInvType type; >> + uint16_t domain_id; >did? OK >> + uint32_t pasid; >> + PCIBus *bus; >> + uint16_t devfn; >> + bool error_happened; >wouldn't it make more sense to store an Error * directly so that you can >fill it whenever there is an error occuring. >Something like VFIOContainerBase::error Will do >> +} VTDPASIDCacheInfo; >> + >> /* PASID Table Related Definitions */ >> #define VTD_PASID_DIR_BASE_ADDR_MASK (~0xfffULL) >> #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL) >> @@ -563,6 +589,7 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL) >> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & >VTD_PASID_TABLE_BITS_MASK) >> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing >> Disable >*/ >> +#define VTD_PASID_TBL_ENTRY_NUM (1ULL << 6) >> >> /* PASID Granular Translation Type Mask */ >> #define VTD_PASID_ENTRY_P 1ULL >> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h >> index 50f9b27a45..fbc9da903a 100644 >> --- a/include/hw/i386/intel_iommu.h >> +++ b/include/hw/i386/intel_iommu.h >> @@ -95,6 +95,11 @@ struct VTDPASIDEntry { >> uint64_t val[8]; >> }; >> >> +typedef struct VTDPASIDCacheEntry { >> + struct VTDPASIDEntry pasid_entry; >> + bool cache_filled; >> +} VTDPASIDCacheEntry; >> + >> struct VTDAddressSpace { >> PCIBus *bus; >> uint8_t devfn; >> @@ -107,6 +112,7 @@ struct VTDAddressSpace { >> MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */ >> IntelIOMMUState *iommu_state; >> VTDContextCacheEntry context_cache_entry; >> + VTDPASIDCacheEntry pasid_cache_entry; >> QLIST_ENTRY(VTDAddressSpace) next; >> /* Superset of notifier flags that this address space has */ >> IOMMUNotifierFlag notifier_flags; >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index 8948b8370f..1db581d14a 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -826,11 +826,24 @@ static inline bool >vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe) >> } >> } >> >> +static inline uint16_t vtd_pe_get_did(VTDPASIDEntry *pe) >> +{ >> + return VTD_SM_PASID_ENTRY_DID((pe)->val[1]); >see my previous comment about extract* >> +} >> + >> static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire) >> { >> return pdire->val & 1; >> } >> >> +static inline void pasid_cache_info_set_error(VTDPASIDCacheInfo *pc_info) >not sure whether this helper is really needed >> +{ >> + if (pc_info->error_happened) { >Besides why not simply overwriting unconditionnally? I will follow VFIO code to save first Error and drop the following ones. >> + return; >> + } >> + pc_info->error_happened = true; >> +} >> + >> /** >> * Caller of this function should check present bit if wants >> * to use pdir entry for further usage except for fpd bit check. >> @@ -3103,6 +3116,241 @@ static bool >vtd_process_piotlb_desc(IntelIOMMUState *s, >> return true; >> } >> >> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as, >> + uint32_t pasid, VTDPASIDEntry >> *pe) >> +{ >> + IntelIOMMUState *s = vtd_as->iommu_state; >> + VTDContextEntry ce; >> + int ret; >> + >> + if (!s->root_scalable) { >> + return -VTD_FR_RTADDR_INV_TTM; >> + } >> + >> + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as- >>devfn, >> + &ce); >> + if (ret) { >> + return ret; >> + } >> + >> + return vtd_ce_get_pasid_entry(s, &ce, pe, pasid); >> +} >> + >> +static bool vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2) >> +{ >> + return !memcmp(p1, p2, sizeof(*p1)); >> +} >> + >> +/* >> + * This function fills in the pasid entry in &vtd_as. Caller >> + * of this function should hold iommu_lock. >> + */ >> +static int vtd_fill_pe_in_cache(IntelIOMMUState *s, VTDAddressSpace *vtd_as, >> + VTDPASIDEntry *pe) >> +{ >> + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; >> + >> + if (vtd_pasid_entry_compare(pe, &pc_entry->pasid_entry)) { >> + /* No need to go further as cached pasid entry is latest */ >> + return 0; >> + } >> + >> + pc_entry->pasid_entry = *pe; >> + pc_entry->cache_filled = true; >> + >> + /* >> + * TODO: send pasid bind to host for passthru devices >> + */ >> + >> + return 0; >> +} >> + >> +/* >> + * This function is used to update or clear cached pasid entry in vtd_as >> + * instances. Caller of this function should hold iommu_lock. >> + */ >> +static gboolean vtd_flush_pasid(gpointer key, gpointer value, >> + gpointer user_data) >> +{ >> + VTDPASIDCacheInfo *pc_info = user_data; >> + VTDAddressSpace *vtd_as = value; >> + IntelIOMMUState *s = vtd_as->iommu_state; >> + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; >> + VTDPASIDEntry pe; >> + uint16_t did; >> + uint32_t pasid; >> + int ret; >> + >> + if (!pc_entry->cache_filled) { >> + return false; >> + } >> + did = vtd_pe_get_did(&pc_entry->pasid_entry); >> + >> + if (vtd_as_to_iommu_pasid_locked(vtd_as, &pasid)) { >> + goto remove; >> + } >> + >> + switch (pc_info->type) { >> + case VTD_PASID_CACHE_PASIDSI: >> + if (pc_info->pasid != pasid) { >> + return false; >> + } >> + /* Fall through */ >> + case VTD_PASID_CACHE_DOMSI: >> + if (pc_info->domain_id != did) { >> + return false; >> + } >> + /* Fall through */ >> + case VTD_PASID_CACHE_GLOBAL_INV: >> + break; >> + default: >> + error_report("invalid pc_info->type"); >> + abort(); >> + } >> + >> + /* >> + * pasid cache invalidation may indicate a present pasid >> + * entry to present pasid entry modification. To cover such >> + * case, vIOMMU emulator needs to fetch latest guest pasid >> + * entry and check cached pasid entry, then update pasid >> + * cache and send pasid bind/unbind to host properly. >if you don't do that in that patch I would put that in a subsequent >patch. Focus on the PASID cache in this patch. See my subsequent comment Sure. >> + */ >> + ret = vtd_dev_get_pe_from_pasid(vtd_as, pasid, &pe); >> + if (ret) { >> + /* >> + * No valid pasid entry in guest memory. e.g. pasid entry >> + * was modified to be either all-zero or non-present. Either >> + * case means existing pasid cache should be removed. >> + */ >> + goto remove; >> + } >> + >> + if (vtd_fill_pe_in_cache(s, vtd_as, &pe)) { >> + pasid_cache_info_set_error(pc_info); >> + } >> + return false; >> + >> +remove: >> + /* >> + * TODO: send pasid unbind to host for passthru devices >> + */ >> + pc_entry->cache_filled = false; >> + >> + /* >> + * Don't remove address space of PCI_NO_PASID which is created by PCI >> + * sub-system. >> + */ >> + if (vtd_as->pasid == PCI_NO_PASID) { >> + return false; >> + } >> + return true; >> +} >> + >> +/* >> + * This function syncs the pasid bindings between guest and host. >> + * It includes updating the pasid cache in vIOMMU and updating the >> + * pasid bindings per guest's latest pasid entry presence. >> + */ >> +static void vtd_pasid_cache_sync(IntelIOMMUState *s, >> + VTDPASIDCacheInfo *pc_info) >> +{ >> + if (!s->flts || !s->root_scalable || !s->dmar_enabled) { >you don't update error_happened. Is that OK? This is the case when no pasid cache used but guest sends pasid cache invalidation request, there is no error, so just ignore this request. >> + return; >> + } >> + >> + /* >> + * Regards to a pasid cache invalidation, e.g. a PSI. >> + * it could be either cases of below: >> + * a) a present pasid entry moved to non-present >> + * b) a present pasid entry to be a present entry >> + * c) a non-present pasid entry moved to present >> + * >> + * Different invalidation granularity may affect different device >> + * scope and pasid scope. But for each invalidation granularity, >> + * it needs to do two steps to sync host and guest pasid binding. >> + * >> + * Here is the handling of a PSI: >> + * 1) loop all the existing vtd_as instances to update them >> + * according to the latest guest pasid entry in pasid table. >> + * this will make sure affected existing vtd_as instances >> + * cached the latest pasid entries. Also, during the loop, the >> + * host should be notified if needed. e.g. pasid unbind or pasid >> + * update. Should be able to cover case a) and case b). >> + * >> + * 2) loop all devices to cover case c) >> + * - For devices which are backed by HostIOMMUDeviceIOMMUFD >instances, >> + * we loop them and check if guest pasid entry exists. If yes, >> + * it is case c), we update the pasid cache and also notify >> + * host. >> + * - For devices which are not backed by HostIOMMUDeviceIOMMUFD, >> + * it is not necessary to create pasid cache at this phase since >> + * it could be created when vIOMMU does DMA address translation. >> + * This is not yet implemented since there is no emulated >> + * pasid-capable devices today. If we have such devices in >> + * future, the pasid cache shall be created there. >> + * Other granularity follow the same steps, just with different scope >I would put all the stuff related to interactions with host in a >subsequent patch. This patch could concentrate on the IOMMU PASID cache >only. And then you would add the extra complexity of syncs with the >host. I think it would simplify the review. Sure. >> + * >> + */ >> + >> + vtd_iommu_lock(s); >> + /* >> + * Step 1: loop all the existing vtd_as instances for pasid unbind and >> + * update. >> + */ >> + g_hash_table_foreach_remove(s->vtd_address_spaces, vtd_flush_pasid, >> + pc_info); >> + vtd_iommu_unlock(s); >> + >> + /* TODO: Step 2: loop all the existing vtd_hiod instances for pasid >> bind. */ >> +} >> + >> +static bool vtd_process_pasid_desc(IntelIOMMUState *s, >> + VTDInvDesc *inv_desc) >> +{ >> + uint16_t domain_id; >> + uint32_t pasid; >> + VTDPASIDCacheInfo pc_info = {}; >> + uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0, >VTD_INV_DESC_ALL_ONE, >> + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; >> + >> + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true, >> + __func__, "pasid cache inv")) { >> + return false; >> + } >> + >> + domain_id = VTD_INV_DESC_PASIDC_DID(inv_desc->val[0]); >> + pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc->val[0]); >> + >> + switch (inv_desc->val[0] & VTD_INV_DESC_PASIDC_G) { >> + case VTD_INV_DESC_PASIDC_DSI: >> + trace_vtd_pasid_cache_dsi(domain_id); >> + pc_info.type = VTD_PASID_CACHE_DOMSI; >> + pc_info.domain_id = domain_id; >> + break; >> + >> + case VTD_INV_DESC_PASIDC_PASID_SI: >> + /* PASID selective implies a DID selective */ >> + trace_vtd_pasid_cache_psi(domain_id, pasid); >> + pc_info.type = VTD_PASID_CACHE_PASIDSI; >> + pc_info.domain_id = domain_id; >> + pc_info.pasid = pasid; >> + break; >> + >> + case VTD_INV_DESC_PASIDC_GLOBAL: >> + trace_vtd_pasid_cache_gsi(); >> + pc_info.type = VTD_PASID_CACHE_GLOBAL_INV; >> + break; >> + >> + default: >> + error_report_once("invalid-inv-granu-in-pc_inv_desc hi: 0x%" PRIx64 >Make it end-user understandable? invalid granulrity field in PASID-cache >invalidate descriptor. Sure. Thanks Zhenzhong