Hi Eric,

>-----Original Message-----
>From: Eric Auger <eric.au...@redhat.com>
>Subject: Re: [PATCH v2 10/19] intel_iommu: Handle PASID entry removing and
>updating
>
>Hi Zhenzhong,
>
>On 6/20/25 9:18 AM, Zhenzhong Duan wrote:
>I would suggest: Handle PASID entry removal and update instead of verbing.
>> This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the
>> pasid entry and track PASID usage and future PASID tagged DMA address
>> translation support in vIOMMU.
>>
>> VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and
>> never freed. For other pasid, VTDAddressSpace instance is created/destroyed
>> per the guest pasid entry set up/destroy for passthrough devices. While for
>> emulated devices, VTDAddressSpace instance is created in the PASID tagged
>DMA
>> translation and be destroyed per guest PASID cache invalidation. This focuses
>s/be detroyed/destroyed
>> on the PASID cache management for passthrough devices as there is no PASID
>> capable emulated devices yet.
>if you don't handle the emulated device case, may be omit talking about
>them here.

OK, will drop the sentence starting from "While for emulated devices..."

>>
>> When guest modifies a PASID entry, QEMU will capture the guest pasid
>selective
>> pasid cache invalidation, allocate or remove a VTDAddressSpace instance per
>the
>> invalidation reasons:
>>
>>     a) a present pasid entry moved to non-present
>>     b) a present pasid entry to be a present entry
>>     c) a non-present pasid entry moved to present
>>
>> This handles a) and b), following patch will handle c).
>This -> This patch

Will do.

>>
>> vIOMMU emulator could figure out the reason by fetching latest guest pasid
>entry
>> and compare it with the PASID cache.
>>
>> Signed-off-by: Yi Liu <yi.l....@intel.com>
>> Signed-off-by: Yi Sun <yi.y....@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>> ---
>>  hw/i386/intel_iommu_internal.h |  27 ++++
>>  include/hw/i386/intel_iommu.h  |   6 +
>>  hw/i386/intel_iommu.c          | 265 +++++++++++++++++++++++++++++++--
>>  hw/i386/trace-events           |   3 +
>>  4 files changed, 291 insertions(+), 10 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index 18bc22fc72..01c881ed4d 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -315,6 +315,7 @@ typedef enum VTDFaultReason {
>>                                    * request while disabled */
>>      VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>
>> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>>      /* PASID directory entry access failure */
>>      VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>>      /* The Present(P) field of pasid directory entry is 0 */
>> @@ -492,6 +493,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>  #define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>>  #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>
>Adding some basic comments for the uncultured reader like me would help
>doing the link with the vtd spec:
>PASID-cache Invalidate Descriptor (pc_inv_dsc) fields

Will do.

>> +#define VTD_INV_DESC_PASIDC_G          (3ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_PASID(val) (((val) >> 32) & 0xfffffULL)
>> +#define VTD_INV_DESC_PASIDC_DID(val)   (((val) >> 16) &
>VTD_DOMAIN_ID_MASK)
>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0  0xfff000000000f1c0ULL
>> +
>> +#define VTD_INV_DESC_PASIDC_DSI        (0ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_PASID_SI   (1ULL << 4)
>> +#define VTD_INV_DESC_PASIDC_GLOBAL     (3ULL << 4)
>as those are values for the granularity field using
>
>VTD_INV_DESC_PASIDC_G_* look relevant to me.

Will do.

>
>
>I think you would gain in readability if you adopt extract32/64 syntax
>like in hw/arm/smmuv3-internal.h
>Looks more readable to me.

This is different from existing style in intel_iommu_internal.h
But I do agree with your suggestions, let me try it with this series.

>> +
>>  /* Information about page-selective IOTLB invalidate */
>>  struct VTDIOTLBPageInvInfo {
>>      uint16_t domain_id;
>> @@ -552,6 +562,22 @@ typedef struct VTDRootEntry VTDRootEntry;
>>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>>
>> +typedef enum VTDPCInvType {
>> +    /* pasid cache invalidation rely on guest PASID entry */
>> +    VTD_PASID_CACHE_GLOBAL_INV, /* pasid cache global invalidation */
>> +    VTD_PASID_CACHE_DOMSI,      /* pasid cache domain selective invalidation
>*/
>> +    VTD_PASID_CACHE_PASIDSI,    /* pasid cache pasid selective invalidation 
>> */
>> +} VTDPCInvType;
>> +
>> +typedef struct VTDPASIDCacheInfo {
>> +    VTDPCInvType type;
>> +    uint16_t domain_id;
>did?

OK

>> +    uint32_t pasid;
>> +    PCIBus *bus;
>> +    uint16_t devfn;
>> +    bool error_happened;
>wouldn't it make more sense to store an Error * directly so that you can
>fill it whenever there is an error occuring.
>Something like VFIOContainerBase::error

Will do

>> +} VTDPASIDCacheInfo;
>> +
>>  /* PASID Table Related Definitions */
>>  #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>>  #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
>> @@ -563,6 +589,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>  #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>>  #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) &
>VTD_PASID_TABLE_BITS_MASK)
>>  #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing 
>> Disable
>*/
>> +#define VTD_PASID_TBL_ENTRY_NUM       (1ULL << 6)
>>
>>  /* PASID Granular Translation Type Mask */
>>  #define VTD_PASID_ENTRY_P              1ULL
>> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
>> index 50f9b27a45..fbc9da903a 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>>      uint64_t val[8];
>>  };
>>
>> +typedef struct VTDPASIDCacheEntry {
>> +    struct VTDPASIDEntry pasid_entry;
>> +    bool cache_filled;
>> +} VTDPASIDCacheEntry;
>> +
>>  struct VTDAddressSpace {
>>      PCIBus *bus;
>>      uint8_t devfn;
>> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>>      MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
>>      IntelIOMMUState *iommu_state;
>>      VTDContextCacheEntry context_cache_entry;
>> +    VTDPASIDCacheEntry pasid_cache_entry;
>>      QLIST_ENTRY(VTDAddressSpace) next;
>>      /* Superset of notifier flags that this address space has */
>>      IOMMUNotifierFlag notifier_flags;
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 8948b8370f..1db581d14a 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -826,11 +826,24 @@ static inline bool
>vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>>      }
>>  }
>>
>> +static inline uint16_t vtd_pe_get_did(VTDPASIDEntry *pe)
>> +{
>> +    return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
>see my previous comment about extract*
>> +}
>> +
>>  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
>>  {
>>      return pdire->val & 1;
>>  }
>>
>> +static inline void pasid_cache_info_set_error(VTDPASIDCacheInfo *pc_info)
>not sure whether this helper is really needed
>> +{
>> +    if (pc_info->error_happened) {
>Besides why not simply overwriting unconditionnally?

I will follow VFIO code to save first Error and drop the following ones.

>> +        return;
>> +    }
>> +    pc_info->error_happened = true;
>> +}
>> +
>>  /**
>>   * Caller of this function should check present bit if wants
>>   * to use pdir entry for further usage except for fpd bit check.
>> @@ -3103,6 +3116,241 @@ static bool
>vtd_process_piotlb_desc(IntelIOMMUState *s,
>>      return true;
>>  }
>>
>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>> +                                            uint32_t pasid, VTDPASIDEntry 
>> *pe)
>> +{
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    VTDContextEntry ce;
>> +    int ret;
>> +
>> +    if (!s->root_scalable) {
>> +        return -VTD_FR_RTADDR_INV_TTM;
>> +    }
>> +
>> +    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as-
>>devfn,
>> +                                   &ce);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    return vtd_ce_get_pasid_entry(s, &ce, pe, pasid);
>> +}
>> +
>> +static bool vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>> +{
>> +    return !memcmp(p1, p2, sizeof(*p1));
>> +}
>> +
>> +/*
>> + * This function fills in the pasid entry in &vtd_as. Caller
>> + * of this function should hold iommu_lock.
>> + */
>> +static int vtd_fill_pe_in_cache(IntelIOMMUState *s, VTDAddressSpace *vtd_as,
>> +                                VTDPASIDEntry *pe)
>> +{
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +
>> +    if (vtd_pasid_entry_compare(pe, &pc_entry->pasid_entry)) {
>> +        /* No need to go further as cached pasid entry is latest */
>> +        return 0;
>> +    }
>> +
>> +    pc_entry->pasid_entry = *pe;
>> +    pc_entry->cache_filled = true;
>> +
>> +    /*
>> +     * TODO: send pasid bind to host for passthru devices
>> +     */
>> +
>> +    return 0;
>> +}
>> +
>> +/*
>> + * This function is used to update or clear cached pasid entry in vtd_as
>> + * instances. Caller of this function should hold iommu_lock.
>> + */
>> +static gboolean vtd_flush_pasid(gpointer key, gpointer value,
>> +                                gpointer user_data)
>> +{
>> +    VTDPASIDCacheInfo *pc_info = user_data;
>> +    VTDAddressSpace *vtd_as = value;
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +    VTDPASIDEntry pe;
>> +    uint16_t did;
>> +    uint32_t pasid;
>> +    int ret;
>> +
>> +    if (!pc_entry->cache_filled) {
>> +        return false;
>> +    }
>> +    did = vtd_pe_get_did(&pc_entry->pasid_entry);
>> +
>> +    if (vtd_as_to_iommu_pasid_locked(vtd_as, &pasid)) {
>> +        goto remove;
>> +    }
>> +
>> +    switch (pc_info->type) {
>> +    case VTD_PASID_CACHE_PASIDSI:
>> +        if (pc_info->pasid != pasid) {
>> +            return false;
>> +        }
>> +        /* Fall through */
>> +    case VTD_PASID_CACHE_DOMSI:
>> +        if (pc_info->domain_id != did) {
>> +            return false;
>> +        }
>> +        /* Fall through */
>> +    case VTD_PASID_CACHE_GLOBAL_INV:
>> +        break;
>> +    default:
>> +        error_report("invalid pc_info->type");
>> +        abort();
>> +    }
>> +
>> +    /*
>> +     * pasid cache invalidation may indicate a present pasid
>> +     * entry to present pasid entry modification. To cover such
>> +     * case, vIOMMU emulator needs to fetch latest guest pasid
>> +     * entry and check cached pasid entry, then update pasid
>> +     * cache and send pasid bind/unbind to host properly.
>if you don't do that in that patch I would put that in a subsequent
>patch. Focus on the PASID cache in this patch. See my subsequent comment

Sure.

>> +     */
>> +    ret = vtd_dev_get_pe_from_pasid(vtd_as, pasid, &pe);
>> +    if (ret) {
>> +        /*
>> +         * No valid pasid entry in guest memory. e.g. pasid entry
>> +         * was modified to be either all-zero or non-present. Either
>> +         * case means existing pasid cache should be removed.
>> +         */
>> +        goto remove;
>> +    }
>> +
>> +    if (vtd_fill_pe_in_cache(s, vtd_as, &pe)) {
>> +        pasid_cache_info_set_error(pc_info);
>> +    }
>> +    return false;
>> +
>> +remove:
>> +    /*
>> +     * TODO: send pasid unbind to host for passthru devices
>> +     */
>> +    pc_entry->cache_filled = false;
>> +
>> +    /*
>> +     * Don't remove address space of PCI_NO_PASID which is created by PCI
>> +     * sub-system.
>> +     */
>> +    if (vtd_as->pasid == PCI_NO_PASID) {
>> +        return false;
>> +    }
>> +    return true;
>> +}
>> +
>> +/*
>> + * This function syncs the pasid bindings between guest and host.
>> + * It includes updating the pasid cache in vIOMMU and updating the
>> + * pasid bindings per guest's latest pasid entry presence.
>> + */
>> +static void vtd_pasid_cache_sync(IntelIOMMUState *s,
>> +                                 VTDPASIDCacheInfo *pc_info)
>> +{
>> +    if (!s->flts || !s->root_scalable || !s->dmar_enabled) {
>you don't update error_happened. Is that OK?

This is the case when no pasid cache used but guest sends pasid cache
invalidation request, there is no error, so just ignore this request.

>> +        return;
>> +    }
>> +
>> +    /*
>> +     * Regards to a pasid cache invalidation, e.g. a PSI.
>> +     * it could be either cases of below:
>> +     * a) a present pasid entry moved to non-present
>> +     * b) a present pasid entry to be a present entry
>> +     * c) a non-present pasid entry moved to present
>> +     *
>> +     * Different invalidation granularity may affect different device
>> +     * scope and pasid scope. But for each invalidation granularity,
>> +     * it needs to do two steps to sync host and guest pasid binding.
>> +     *
>> +     * Here is the handling of a PSI:
>> +     * 1) loop all the existing vtd_as instances to update them
>> +     *    according to the latest guest pasid entry in pasid table.
>> +     *    this will make sure affected existing vtd_as instances
>> +     *    cached the latest pasid entries. Also, during the loop, the
>> +     *    host should be notified if needed. e.g. pasid unbind or pasid
>> +     *    update. Should be able to cover case a) and case b).
>> +     *
>> +     * 2) loop all devices to cover case c)
>> +     *    - For devices which are backed by HostIOMMUDeviceIOMMUFD
>instances,
>> +     *      we loop them and check if guest pasid entry exists. If yes,
>> +     *      it is case c), we update the pasid cache and also notify
>> +     *      host.
>> +     *    - For devices which are not backed by HostIOMMUDeviceIOMMUFD,
>> +     *      it is not necessary to create pasid cache at this phase since
>> +     *      it could be created when vIOMMU does DMA address translation.
>> +     *      This is not yet implemented since there is no emulated
>> +     *      pasid-capable devices today. If we have such devices in
>> +     *      future, the pasid cache shall be created there.
>> +     * Other granularity follow the same steps, just with different scope
>I would put all the stuff related to interactions with host in a
>subsequent patch. This patch could concentrate on the IOMMU PASID cache
>only. And then you would add the extra complexity of syncs with the
>host. I think it would simplify the review.

Sure.

>> +     *
>> +     */
>> +
>> +    vtd_iommu_lock(s);
>> +    /*
>> +     * Step 1: loop all the existing vtd_as instances for pasid unbind and
>> +     * update.
>> +     */
>> +    g_hash_table_foreach_remove(s->vtd_address_spaces, vtd_flush_pasid,
>> +                                pc_info);
>> +    vtd_iommu_unlock(s);
>> +
>> +    /* TODO: Step 2: loop all the existing vtd_hiod instances for pasid 
>> bind. */
>> +}
>> +
>> +static bool vtd_process_pasid_desc(IntelIOMMUState *s,
>> +                                   VTDInvDesc *inv_desc)
>> +{
>> +    uint16_t domain_id;
>> +    uint32_t pasid;
>> +    VTDPASIDCacheInfo pc_info = {};
>> +    uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0,
>VTD_INV_DESC_ALL_ONE,
>> +                        VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
>> +
>> +    if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
>> +                                     __func__, "pasid cache inv")) {
>> +        return false;
>> +    }
>> +
>> +    domain_id = VTD_INV_DESC_PASIDC_DID(inv_desc->val[0]);
>> +    pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc->val[0]);
>> +
>> +    switch (inv_desc->val[0] & VTD_INV_DESC_PASIDC_G) {
>> +    case VTD_INV_DESC_PASIDC_DSI:
>> +        trace_vtd_pasid_cache_dsi(domain_id);
>> +        pc_info.type = VTD_PASID_CACHE_DOMSI;
>> +        pc_info.domain_id = domain_id;
>> +        break;
>> +
>> +    case VTD_INV_DESC_PASIDC_PASID_SI:
>> +        /* PASID selective implies a DID selective */
>> +        trace_vtd_pasid_cache_psi(domain_id, pasid);
>> +        pc_info.type = VTD_PASID_CACHE_PASIDSI;
>> +        pc_info.domain_id = domain_id;
>> +        pc_info.pasid = pasid;
>> +        break;
>> +
>> +    case VTD_INV_DESC_PASIDC_GLOBAL:
>> +        trace_vtd_pasid_cache_gsi();
>> +        pc_info.type = VTD_PASID_CACHE_GLOBAL_INV;
>> +        break;
>> +
>> +    default:
>> +        error_report_once("invalid-inv-granu-in-pc_inv_desc hi: 0x%" PRIx64
>Make it end-user understandable? invalid granulrity field in PASID-cache
>invalidate descriptor.

Sure.

Thanks
Zhenzhong

Reply via email to