On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly range mapped on stage-2 page table could still be written.
Reference from 4th Gen Intel Xeon Processor Scalable Family Specification Update, Errata Details, SPR17. https://www.intel.com/content/www/us/en/content-details/772415/content-details.html Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> --- include/hw/vfio/vfio-container-base.h | 1 + hw/vfio/iommufd.c | 8 +++++++- hw/vfio/listener.c | 13 +++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index f0232654ee..e5c51a51ac 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -51,6 +51,7 @@ typedef struct VFIOContainerBase { QLIST_HEAD(, VFIODevice) device_list; GList *iova_ranges; NotifierWithReturn cpr_reboot_notifier; + bool bypass_ro; } VFIOContainerBase; typedef struct VFIOGuestIOMMU { diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 83a632bdee..23839a511a 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -306,6 +306,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, { ERRP_GUARD(); IOMMUFDBackend *iommufd = vbasedev->iommufd; + struct iommu_hw_info_vtd vtd; uint32_t type, flags = 0; uint64_t hw_caps; VFIOIOASHwpt *hwpt; @@ -345,10 +346,15 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, * instead. */ if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid, - &type, NULL, 0, &hw_caps, errp)) { + &type, &vtd, sizeof(vtd), &hw_caps, + errp)) { return false; } + if (vtd.flags & IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17) { + container->bcontainer.bypass_ro = true; + } + if (hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) { flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; } diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index f498e23a93..c64aa4539e 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -364,7 +364,8 @@ static bool vfio_known_safe_misalignment(MemoryRegionSection *section) return true; } -static bool vfio_listener_valid_section(MemoryRegionSection *section, +static bool vfio_listener_valid_section(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, const char *name) { if (vfio_listener_skipped_section(section)) { @@ -375,6 +376,10 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section, return false; } + if (bcontainer && bcontainer->bypass_ro && section->readonly) { + return false; + } + if (unlikely((section->offset_within_address_space & ~qemu_real_host_page_mask()) != (section->offset_within_region & ~qemu_real_host_page_mask()))) { @@ -494,7 +499,7 @@ void vfio_container_region_add(VFIOContainerBase *bcontainer, int ret; Error *err = NULL; - if (!vfio_listener_valid_section(section, "region_add")) { + if (!vfio_listener_valid_section(bcontainer, section, "region_add")) { return; } @@ -655,7 +660,7 @@ static void vfio_listener_region_del(MemoryListener *listener, int ret; bool try_unmap = true; - if (!vfio_listener_valid_section(section, "region_del")) { + if (!vfio_listener_valid_section(bcontainer, section, "region_del")) { return; } @@ -812,7 +817,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, container_of(listener, VFIODirtyRangesListener, listener); hwaddr iova, end; - if (!vfio_listener_valid_section(section, "tracking_update") || + if (!vfio_listener_valid_section(NULL, section, "tracking_update") || !vfio_get_section_iova_range(dirty->bcontainer, section, &iova, &end, NULL)) { return; -- 2.34.1