Hi Zhenzhong,

On 6/20/25 9:18 AM, Zhenzhong Duan wrote:
> On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
> is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly
> range mapped on stage-2 page table could still be written.

I would split this patch into a vfio only patch and an iommu one that
sets bcontainer->readonly according to the fetched info.
>
> Reference from 4th Gen Intel Xeon Processor Scalable Family Specification
> Update, Errata Details, SPR17.
> https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
the link does not work for me.

Please could you explain in english what the errata is about and what
actions need to be taken care in VFIO?

Sorry I failed to understand "

Due to this errata, even the readonly
range mapped on stage-2 page table could still be written.

"
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
> ---
>  include/hw/vfio/vfio-container-base.h |  1 +
>  hw/vfio/iommufd.c                     |  8 +++++++-
>  hw/vfio/listener.c                    | 13 +++++++++----
>  3 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-container-base.h 
> b/include/hw/vfio/vfio-container-base.h
> index f0232654ee..e5c51a51ac 100644
> --- a/include/hw/vfio/vfio-container-base.h
> +++ b/include/hw/vfio/vfio-container-base.h
> @@ -51,6 +51,7 @@ typedef struct VFIOContainerBase {
>      QLIST_HEAD(, VFIODevice) device_list;
>      GList *iova_ranges;
>      NotifierWithReturn cpr_reboot_notifier;
> +    bool bypass_ro;
>  } VFIOContainerBase;
>  
>  typedef struct VFIOGuestIOMMU {
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 83a632bdee..23839a511a 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -306,6 +306,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice 
> *vbasedev,
>  {
>      ERRP_GUARD();
>      IOMMUFDBackend *iommufd = vbasedev->iommufd;
> +    struct iommu_hw_info_vtd vtd;
= {}; ?
>      uint32_t type, flags = 0;
>      uint64_t hw_caps;
>      VFIOIOASHwpt *hwpt;
> @@ -345,10 +346,15 @@ static bool iommufd_cdev_autodomains_get(VFIODevice 
> *vbasedev,
>       * instead.
>       */
>      if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
> -                                         &type, NULL, 0, &hw_caps, errp)) {
> +                                         &type, &vtd, sizeof(vtd), &hw_caps,
> +                                         errp)) {
>          return false;
>      }
>  
> +    if (vtd.flags & IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17) {
> +        container->bcontainer.bypass_ro = true;
> +    }
> +
>      if (hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>          flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>      }
> diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
> index f498e23a93..c64aa4539e 100644
> --- a/hw/vfio/listener.c
> +++ b/hw/vfio/listener.c
> @@ -364,7 +364,8 @@ static bool 
> vfio_known_safe_misalignment(MemoryRegionSection *section)
>      return true;
>  }
>  
> -static bool vfio_listener_valid_section(MemoryRegionSection *section,
> +static bool vfio_listener_valid_section(VFIOContainerBase *bcontainer,
> +                                        MemoryRegionSection *section,
>                                          const char *name)
>  {
>      if (vfio_listener_skipped_section(section)) {
> @@ -375,6 +376,10 @@ static bool 
> vfio_listener_valid_section(MemoryRegionSection *section,
>          return false;
>      }
>  
> +    if (bcontainer && bcontainer->bypass_ro && section->readonly) {
> +        return false;
> +    }
> +
>      if (unlikely((section->offset_within_address_space &
>                    ~qemu_real_host_page_mask()) !=
>                   (section->offset_within_region & 
> ~qemu_real_host_page_mask()))) {
> @@ -494,7 +499,7 @@ void vfio_container_region_add(VFIOContainerBase 
> *bcontainer,
>      int ret;
>      Error *err = NULL;
>  
> -    if (!vfio_listener_valid_section(section, "region_add")) {
> +    if (!vfio_listener_valid_section(bcontainer, section, "region_add")) {
>          return;
>      }
>  
> @@ -655,7 +660,7 @@ static void vfio_listener_region_del(MemoryListener 
> *listener,
>      int ret;
>      bool try_unmap = true;
>  
> -    if (!vfio_listener_valid_section(section, "region_del")) {
> +    if (!vfio_listener_valid_section(bcontainer, section, "region_del")) {
>          return;
>      }
>  
> @@ -812,7 +817,7 @@ static void vfio_dirty_tracking_update(MemoryListener 
> *listener,
>          container_of(listener, VFIODirtyRangesListener, listener);
>      hwaddr iova, end;
>  
> -    if (!vfio_listener_valid_section(section, "tracking_update") ||
> +    if (!vfio_listener_valid_section(NULL, section, "tracking_update") ||
>          !vfio_get_section_iova_range(dirty->bcontainer, section,
>                                       &iova, &end, NULL)) {
>          return;
Thanks

Eric


Reply via email to