On 2025/11/28 10:08, Duan, Zhenzhong wrote:
Hi Yi, Cedric,

Could you also help comment on this patch? This is a pure VFIO migration related 
optimization, I think it's better to let it go with the "vfio: relax the vIOMMU 
check" series.
I'd like to move it in next respin of "vfio: relax the vIOMMU check" series if 
you think it make sense.

It makes sense to me.

Thanks
Zhenzhong

-----Original Message-----
From: Duan, Zhenzhong <[email protected]>
Subject: [PATCH v8 17/23] vfio/listener: Bypass readonly region for dirty
tracking

When doing ditry tracking or calculating dirty tracking range, readonly

s/ditry/dirty/

regions can be bypassed, because corresponding DMA mappings are readonly
and never become dirty.

This can optimize dirty tracking a bit for passthrough device.

Signed-off-by: Zhenzhong Duan <[email protected]>
---
hw/vfio/listener.c   | 46 +++++++++++++++++++++++++++++++++-----------
hw/vfio/trace-events |  1 +
2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index 3b48f6796c..ca2377d860 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -76,8 +76,13 @@ static bool vfio_log_sync_needed(const VFIOContainer
*bcontainer)
     return true;
}

-static bool vfio_listener_skipped_section(MemoryRegionSection *section)
+static bool vfio_listener_skipped_section(MemoryRegionSection *section,
+                                          bool bypass_ro)
{
+    if (bypass_ro && section->readonly) {
+        return true;
+    }
+
     return (!memory_region_is_ram(section->mr) &&
             !memory_region_is_iommu(section->mr)) ||
            memory_region_is_protected(section->mr) ||
@@ -368,9 +373,9 @@ static bool
vfio_known_safe_misalignment(MemoryRegionSection *section)
}

static bool vfio_listener_valid_section(MemoryRegionSection *section,
-                                        const char *name)
+                                        bool bypass_ro, const char
*name)
{
-    if (vfio_listener_skipped_section(section)) {
+    if (vfio_listener_skipped_section(section, bypass_ro)) {
         trace_vfio_listener_region_skip(name,
                 section->offset_within_address_space,
                 section->offset_within_address_space +
@@ -497,7 +502,7 @@ void vfio_container_region_add(VFIOContainer
*bcontainer,
     int ret;
     Error *err = NULL;

-    if (!vfio_listener_valid_section(section, "region_add")) {
+    if (!vfio_listener_valid_section(section, false, "region_add")) {
         return;
     }

@@ -663,7 +668,7 @@ static void vfio_listener_region_del(MemoryListener
*listener,
     int ret;
     bool try_unmap = true;

-    if (!vfio_listener_valid_section(section, "region_del")) {
+    if (!vfio_listener_valid_section(section, false, "region_del")) {
         return;
     }

@@ -722,11 +727,11 @@ static void
vfio_listener_region_del(MemoryListener *listener,
         }

         /*
-         * Fake an IOTLB entry for identity mapping which is needed by
dirty
-         * tracking. In fact, in unmap_bitmap, only translated_addr field is
-         * used to set dirty bitmap.
+         * Fake an IOTLB entry for writable identity mapping which is
needed
+         * by dirty tracking. In fact, in unmap_bitmap, only
translated_addr
+         * field is used to set dirty bitmap.
          */
-        if (!memory_region_is_iommu(section->mr)) {
+        if (!memory_region_is_iommu(section->mr)
&& !section->readonly) {
             entry.iova = iova;
             entry.translated_addr = iova;
             iotlb = &entry;
@@ -834,7 +839,8 @@ static void
vfio_dirty_tracking_update(MemoryListener *listener,
         container_of(listener, VFIODirtyRangesListener, listener);
     hwaddr iova, end;

-    if (!vfio_listener_valid_section(section, "tracking_update") ||
+    /* Bypass readonly section as it never becomes dirty */
+    if (!vfio_listener_valid_section(section, true, "tracking_update") ||
         !vfio_get_section_iova_range(dirty->bcontainer, section,
                                      &iova, &end, NULL)) {
         return;
@@ -1093,6 +1099,19 @@ static void
vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
     if (!mr) {
         goto out_unlock;
     }
+
+    /*
+     * The mapping is readonly when either it's a readonly mapping in guest
+     * or mapped target is readonly, bypass it for dirty tracking as it
+     * never becomes dirty.
+     */
+    if (!(iotlb->perm & IOMMU_WO) || mr->readonly) {

is it possible that guest maps RW, while the backend mr is readonly?

+        trace_vfio_iommu_map_dirty_notify_skip_ro(iova,
+                                                  iova +
iotlb->addr_mask);
+        rcu_read_unlock();
+        return;
+    }
+
     translated_addr = memory_region_get_ram_addr(mr) + xlat;

     ret = vfio_container_query_dirty_bitmap(bcontainer, iova,
iotlb->addr_mask + 1,
@@ -1228,7 +1247,12 @@ static void
vfio_listener_log_sync(MemoryListener *listener,
     int ret;
     Error *local_err = NULL;

-    if (vfio_listener_skipped_section(section)) {
+    /*
+     * Bypass readonly section as it never becomes dirty, iommu memory
section
+     * is RW and never bypassed. The readonly mappings in iommu MR are
bypassed
+     * in vfio_iommu_map_dirty_notify().
+     */
+    if (vfio_listener_skipped_section(section, true)) {
         return;
     }


Looks the vfio_iommu_map_notify() is missed. It also has unmap op. is
it?

diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 3c62bab764..180e3d526b 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -103,6 +103,7 @@ vfio_listener_region_del(uint64_t start, uint64_t end)
"region_del 0x%"PRIx64" -
vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min,
uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" -
0x%"PRIx64"]"
vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t
max32, uint64_t min64, uint64_t max64, uint64_t minpci, uint64_t maxpci)
"nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" -
0x%"PRIx64"], pci64:[0x%"PRIx64" - 0x%"PRIx64"]"
vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end)
"iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
+vfio_iommu_map_dirty_notify_skip_ro(uint64_t iova_start, uint64_t
iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64

# container.c
vfio_container_query_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t
backend_flag, uint64_t bitmap_size, uint64_t translated_addr, uint64_t
dirty_pages) "iova=0x%"PRIx64" size=0x%"PRIx64"
backend_flag=0x%"PRIx64" bitmap_size=0x%"PRIx64" gpa=0x%"PRIx64"
dirty_pages=%"PRIu64
--
2.47.1


Reply via email to