>-----Original Message-----
>From: Eric Auger <eric.au...@redhat.com>
>Subject: Re: [PATCH v2 18/19] Workaround for ERRATA_772415_SPR17
>
>Hi Zhenzhong,
>
>On 6/20/25 9:18 AM, Zhenzhong Duan wrote:
>> On a system influenced by ERRATA_772415,
>IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
>> is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the
>readonly
>> range mapped on stage-2 page table could still be written.
>
>I would split this patch into a vfio only patch and an iommu one that
>sets bcontainer->readonly according to the fetched info.

Will do.

>>
>> Reference from 4th Gen Intel Xeon Processor Scalable Family Specification
>> Update, Errata Details, SPR17.
>> https://www.intel.com/content/www/us/en/content-details/772415/content-
>details.html
>the link does not work for me.

That's strange, what about:

https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/eagle-stream/sapphire-rapids-specification-update/

>
>Please could you explain in english what the errata is about and what
>actions need to be taken care in VFIO?
>
>Sorry I failed to understand "
>
>Due to this errata, even the readonly
>range mapped on stage-2 page table could still be written.

Copy the explanation in above link:

"SPR17. Remapping Hardware May Set Access/Dirty Bits in a First-stage
Page-table Entry

Problem: When remapping hardware is configured by system software in
scalable mode as Nested (PGTT=011b) and with PWSNP field Set in the
PASID-table-entry, it may Set Accessed bit and Dirty bit (and Extended
Access bit if enabled) in first-stage page-table entries even when
second-stage mappings indicate that corresponding first-stage page-table
is Read-Only.

Implication: Due to this erratum, pages mapped as Read-only in second-stage
page-tables may be modified by remapping hardware Access/Dirty bit updates.

Workaround: None identified. System software enabling nested translations
for a VM should ensure that there are no read-only pages in the
corresponding second-stage mappings."

>
>"
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>> ---
>>  include/hw/vfio/vfio-container-base.h |  1 +
>>  hw/vfio/iommufd.c                     |  8 +++++++-
>>  hw/vfio/listener.c                    | 13 +++++++++----
>>  3 files changed, 17 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-
>container-base.h
>> index f0232654ee..e5c51a51ac 100644
>> --- a/include/hw/vfio/vfio-container-base.h
>> +++ b/include/hw/vfio/vfio-container-base.h
>> @@ -51,6 +51,7 @@ typedef struct VFIOContainerBase {
>>      QLIST_HEAD(, VFIODevice) device_list;
>>      GList *iova_ranges;
>>      NotifierWithReturn cpr_reboot_notifier;
>> +    bool bypass_ro;
>>  } VFIOContainerBase;
>>
>>  typedef struct VFIOGuestIOMMU {
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 83a632bdee..23839a511a 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -306,6 +306,7 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>  {
>>      ERRP_GUARD();
>>      IOMMUFDBackend *iommufd = vbasedev->iommufd;
>> +    struct iommu_hw_info_vtd vtd;
>= {}; ?

Will do.

>>      uint32_t type, flags = 0;
>>      uint64_t hw_caps;
>>      VFIOIOASHwpt *hwpt;
>> @@ -345,10 +346,15 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>       * instead.
>>       */
>>      if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev-
>>devid,
>> -                                         &type, NULL, 0, &hw_caps, errp)) {
>> +                                         &type, &vtd, sizeof(vtd), &hw_caps,
>> +                                         errp)) {
>>          return false;
>>      }
>>
>> +    if (vtd.flags & IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17) {
>> +        container->bcontainer.bypass_ro = true;
>> +    }
>> +
>>      if (hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>>          flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>      }
>> diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
>> index f498e23a93..c64aa4539e 100644
>> --- a/hw/vfio/listener.c
>> +++ b/hw/vfio/listener.c
>> @@ -364,7 +364,8 @@ static bool
>vfio_known_safe_misalignment(MemoryRegionSection *section)
>>      return true;
>>  }
>>
>> -static bool vfio_listener_valid_section(MemoryRegionSection *section,
>> +static bool vfio_listener_valid_section(VFIOContainerBase *bcontainer,
>> +                                        MemoryRegionSection *section,
>>                                          const char *name)
>>  {
>>      if (vfio_listener_skipped_section(section)) {
>> @@ -375,6 +376,10 @@ static bool
>vfio_listener_valid_section(MemoryRegionSection *section,
>>          return false;
>>      }
>>
>> +    if (bcontainer && bcontainer->bypass_ro && section->readonly) {
>> +        return false;
>> +    }
>> +
>>      if (unlikely((section->offset_within_address_space &
>>                    ~qemu_real_host_page_mask()) !=
>>                   (section->offset_within_region & 
>> ~qemu_real_host_page_mask()))) {
>> @@ -494,7 +499,7 @@ void vfio_container_region_add(VFIOContainerBase
>*bcontainer,
>>      int ret;
>>      Error *err = NULL;
>>
>> -    if (!vfio_listener_valid_section(section, "region_add")) {
>> +    if (!vfio_listener_valid_section(bcontainer, section, "region_add")) {
>>          return;
>>      }
>>
>> @@ -655,7 +660,7 @@ static void vfio_listener_region_del(MemoryListener
>*listener,
>>      int ret;
>>      bool try_unmap = true;
>>
>> -    if (!vfio_listener_valid_section(section, "region_del")) {
>> +    if (!vfio_listener_valid_section(bcontainer, section, "region_del")) {
>>          return;
>>      }
>>
>> @@ -812,7 +817,7 @@ static void vfio_dirty_tracking_update(MemoryListener
>*listener,
>>          container_of(listener, VFIODirtyRangesListener, listener);
>>      hwaddr iova, end;
>>
>> -    if (!vfio_listener_valid_section(section, "tracking_update") ||
>> +    if (!vfio_listener_valid_section(NULL, section, "tracking_update") ||
>>          !vfio_get_section_iova_range(dirty->bcontainer, section,
>>                                       &iova, &end, NULL)) {
>>          return;
>Thanks
>
>Eric

Reply via email to