Hi Eric,

>-----Original Message-----
>From: Eric Auger <[email protected]>
>Subject: Re: [PATCH v8 13/23] intel_iommu_accel: Bind/unbind guest page
>table to host
>
>Hi Zhenzhong,
>
>On 11/17/25 10:37 AM, Zhenzhong Duan wrote:
>> This captures the guest PASID table entry modifications and propagates
>> the changes to host to attach a hwpt with type determined per guest
>IOMMU
>> PGTT configuration.
>>
>> When PGTT=PT, attach PASID_0 to a second stage HWPT(GPA->HPA).
>> When PGTT=FST, attach PASID_0 to nested HWPT with nesting parent HWPT
>> coming from VFIO.
>>
>> Co-Authored-by: Yi Liu <[email protected]>
>> Signed-off-by: Yi Liu <[email protected]>
>> Signed-off-by: Yi Sun <[email protected]>
>> Signed-off-by: Zhenzhong Duan <[email protected]>
>> ---
>>  hw/i386/intel_iommu_accel.h   |   6 ++
>>  include/hw/i386/intel_iommu.h |   1 +
>>  hw/i386/intel_iommu.c         |  22 ++++++-
>>  hw/i386/intel_iommu_accel.c   | 114
>++++++++++++++++++++++++++++++++++
>>  hw/i386/trace-events          |   3 +
>>  5 files changed, 143 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>> index dbe6ee6982..1a396c50a0 100644
>> --- a/hw/i386/intel_iommu_accel.h
>> +++ b/hw/i386/intel_iommu_accel.h
>> @@ -16,6 +16,7 @@
>>  bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>*vtd_hiod,
>>                            Error **errp);
>>  VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as);
>> +bool vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>>  #else
>>  static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>>                                          VTDHostIOMMUDevice
>*vtd_hiod,
>> @@ -30,5 +31,10 @@ static inline VTDHostIOMMUDevice
>*vtd_find_hiod_iommufd(VTDAddressSpace *as)
>>  {
>>      return NULL;
>>  }
>> +
>> +static inline bool vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error
>**errp)
>> +{
>> +    return true;
>> +}
>>  #endif
>>  #endif
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 401322665a..8ce8fe1b75 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -104,6 +104,7 @@ struct VTDAddressSpace {
>>      PCIBus *bus;
>>      uint8_t devfn;
>>      uint32_t pasid;
>> +    uint32_t fs_hwpt;
>nit use fs_hwpt_id

Will do.

>>      AddressSpace as;
>>      IOMMUMemoryRegion iommu;
>>      MemoryRegion root;          /* The root container of the device
>*/
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 513b2c85d4..36449bf161 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -88,7 +88,11 @@ static void
>vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>>      g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
>>      while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
>>          VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> -        pc_entry->valid = false;
>> +        if (pc_entry->valid) {
>> +            pc_entry->valid = false;
>> +            /* It's fatal to get failure during reset */
>could you elaborate on this. Why is it more fatal on reset that on other
>circumstances which do not abort?

During reset, s->dmar_enabled and pc_entry->valid are both false,
so vtd_bind_guest_pasid() is actually a call to
host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, errp)
which does attachment to default HWPT containing shadow page table.
This should never fail or else we have caught it in vfio_pci_realize().

For vtd_bind_guest_pasid() during pasid entry invalidation, the error can
come from wrong first stage config by guest, we should inject fault event
to guest instead of abort qemu, this is currently not implemented yet and
marked as TODO.

>> +            vtd_bind_guest_pasid(vtd_as, &error_fatal);
>> +        }
>>      }
>>  }
>>
>> @@ -3074,6 +3078,8 @@ static void
>vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>      VTDPASIDEntry pe;
>>      IOMMUNotifier *n;
>>      uint16_t did;
>> +    const char *err_prefix = "Attaching to HWPT failed: ";
>> +    Error *local_err = NULL;
>>
>>      if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>>          if (!pc_entry->valid) {
>> @@ -3094,7 +3100,9 @@ static void
>vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>              vtd_address_space_unmap(vtd_as, n);
>>          }
>>          vtd_switch_address_space(vtd_as);
>> -        return;
>> +
>> +        err_prefix = "Detaching from HWPT failed: ";
>> +        goto do_bind_unbind;
>>      }
>>
>>      /*
>> @@ -3122,12 +3130,20 @@ static void
>vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>      if (!pc_entry->valid) {
>>          pc_entry->pasid_entry = pe;
>>          pc_entry->valid = true;
>> -    } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
>> +    } else if (vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
>> +        err_prefix = "Replacing HWPT attachment failed: ";
>> +    } else {
>>          return;
>>      }
>>
>>      vtd_switch_address_space(vtd_as);
>>      vtd_address_space_sync(vtd_as);
>> +
>> +do_bind_unbind:
>> +    /* TODO: Fault event injection into guest, report error to QEMU for
>now */
>> +    if (!vtd_bind_guest_pasid(vtd_as, &local_err)) {
>> +        error_reportf_err(local_err, "%s", err_prefix);
>> +    }
>>  }
>>
>>  static void vtd_pasid_cache_sync(IntelIOMMUState *s,
>VTDPASIDCacheInfo *pc_info)
>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>> index ebfc503d64..66570ea919 100644
>> --- a/hw/i386/intel_iommu_accel.c
>> +++ b/hw/i386/intel_iommu_accel.c
>> @@ -13,6 +13,7 @@
>>  #include "intel_iommu_internal.h"
>>  #include "intel_iommu_accel.h"
>>  #include "hw/pci/pci_bus.h"
>> +#include "trace.h"
>>
>>  bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>*vtd_hiod,
>>                            Error **errp)
>> @@ -68,3 +69,116 @@ VTDHostIOMMUDevice
>*vtd_find_hiod_iommufd(VTDAddressSpace *as)
>>      }
>>      return NULL;
>>  }
>> +
>> +static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> +                               VTDPASIDEntry *pe, uint32_t
>*fs_hwpt,
>> +                               Error **errp)
>> +{
>> +    struct iommu_hwpt_vtd_s1 vtd = {};
>> +
>> +    vtd.flags = (VTD_SM_PASID_ENTRY_SRE(pe) ? IOMMU_VTD_S1_SRE :
>0) |
>> +                (VTD_SM_PASID_ENTRY_WPE(pe) ?
>IOMMU_VTD_S1_WPE : 0) |
>> +                (VTD_SM_PASID_ENTRY_EAFE(pe) ?
>IOMMU_VTD_S1_EAFE : 0);
>> +    vtd.addr_width = vtd_pe_get_fs_aw(pe);
>> +    vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
>> +
>> +    return iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
>idev->hwpt_id,
>> +                                      0,
>IOMMU_HWPT_DATA_VTD_S1, sizeof(vtd),
>> +                                      &vtd, fs_hwpt, errp);
>> +}
>> +
>> +static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> +                                    VTDAddressSpace *vtd_as)
>> +{
>> +    if (!vtd_as->fs_hwpt) {
>> +        return;
>> +    }
>> +    iommufd_backend_free_id(idev->iommufd, vtd_as->fs_hwpt);
>> +    vtd_as->fs_hwpt = 0;
>is it a valid assumption a valid ID cannot be null? Is it documented
>somewhere?

I didn't find it in uAPI doc, but it's hard coded in kernel:

xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);

Hi @[email protected], @[email protected] do you have a guideline on this?
Could I take zero id reserved?

>> +}
>> +
>> +static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice
>*vtd_hiod,
>> +                                      VTDAddressSpace *vtd_as,
>Error **errp)
>> +{
>> +    HostIOMMUDeviceIOMMUFD *idev =
>HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
>> +    VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
>> +    uint32_t hwpt_id;
>nit init to idev->hwpt_id

Will do.

>> +    bool ret;
>> +
>> +    /*
>> +     * We can get here only if flts=on, the supported PGTT is FST or PT.
>> +     * Catch invalid PGTT when processing invalidation request to avoid
>> +     * attaching to wrong hwpt.
>> +     */
>> +    if (!vtd_pe_pgtt_is_fst(pe) && !vtd_pe_pgtt_is_pt(pe)) {
>> +        error_setg(errp, "Invalid PGTT type %d",
>> +                   (uint8_t)VTD_SM_PASID_ENTRY_PGTT(pe));
>> +        return false;
>> +    }
>> +
>> +    if (vtd_pe_pgtt_is_pt(pe)) {
>> +        hwpt_id = idev->hwpt_id;
>> +    } else if (!vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
>> +        return false;
>> +    }
>and
>
>if (vtd_pe_pgtt_is_fst(pe)) {
>    if ((!vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp) {
>        return false;
>    }
>
>}
>this will match free block below

Will do

>> +
>> +    ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id,
>errp);
>> +    trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id,
>ret);
>> +    if (ret) {
>> +        /* Destroy old fs_hwpt if it's a replacement */
>> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
>> +        if (vtd_pe_pgtt_is_fst(pe)) {
>> +            vtd_as->fs_hwpt = hwpt_id;
>> +        }
>> +    } else if (vtd_pe_pgtt_is_fst(pe)) {
>> +        iommufd_backend_free_id(idev->iommufd, hwpt_id);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice
>*vtd_hiod,
>> +                                      VTDAddressSpace *vtd_as,
>Error **errp)
>> +{
>> +    HostIOMMUDeviceIOMMUFD *idev =
>HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    uint32_t pasid = vtd_as->pasid;
>> +    bool ret;
>> +
>> +    if (s->dmar_enabled && s->root_scalable) {
>> +        ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
>> +        trace_vtd_device_detach_hwpt(idev->devid, pasid, ret);
>> +    } else {
>> +        /*
>> +         * If DMAR remapping is disabled or guest switches to legacy
>mode,
>> +         * we fallback to the default HWPT which contains shadow page
>table.
>> +         * So guest DMA could still work.
>> +         */
>> +        ret = host_iommu_device_iommufd_attach_hwpt(idev,
>idev->hwpt_id, errp);
>> +        trace_vtd_device_reattach_def_hwpt(idev->devid, pasid,
>idev->hwpt_id,
>> +                                           ret);
>> +    }
>> +
>> +    if (ret) {
>> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +bool vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
>I find it a little bit confusing to have bind doing the unbind too.
>at least I would rename into vtd_propagate_guest_pasid or something alike.

Indeed, I had ever thought to use vtd_bind_unbind_guest_pasid...
vtd_propagate_guest_pasid sounds better, will use it.

Thanks
Zhenzhong

Reply via email to