Hi Yi, Cedric, Could you also help comment on this patch? This is a pure VFIO migration related optimization, I think it's better to let it go with the "vfio: relax the vIOMMU check" series. I'd like to move it in next respin of "vfio: relax the vIOMMU check" series if you think it make sense.
Thanks Zhenzhong >-----Original Message----- >From: Duan, Zhenzhong <[email protected]> >Subject: [PATCH v8 17/23] vfio/listener: Bypass readonly region for dirty >tracking > >When doing ditry tracking or calculating dirty tracking range, readonly >regions can be bypassed, because corresponding DMA mappings are readonly >and never become dirty. > >This can optimize dirty tracking a bit for passthrough device. > >Signed-off-by: Zhenzhong Duan <[email protected]> >--- > hw/vfio/listener.c | 46 +++++++++++++++++++++++++++++++++----------- > hw/vfio/trace-events | 1 + > 2 files changed, 36 insertions(+), 11 deletions(-) > >diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c >index 3b48f6796c..ca2377d860 100644 >--- a/hw/vfio/listener.c >+++ b/hw/vfio/listener.c >@@ -76,8 +76,13 @@ static bool vfio_log_sync_needed(const VFIOContainer >*bcontainer) > return true; > } > >-static bool vfio_listener_skipped_section(MemoryRegionSection *section) >+static bool vfio_listener_skipped_section(MemoryRegionSection *section, >+ bool bypass_ro) > { >+ if (bypass_ro && section->readonly) { >+ return true; >+ } >+ > return (!memory_region_is_ram(section->mr) && > !memory_region_is_iommu(section->mr)) || > memory_region_is_protected(section->mr) || >@@ -368,9 +373,9 @@ static bool >vfio_known_safe_misalignment(MemoryRegionSection *section) > } > > static bool vfio_listener_valid_section(MemoryRegionSection *section, >- const char *name) >+ bool bypass_ro, const char >*name) > { >- if (vfio_listener_skipped_section(section)) { >+ if (vfio_listener_skipped_section(section, bypass_ro)) { > trace_vfio_listener_region_skip(name, > section->offset_within_address_space, > section->offset_within_address_space + >@@ -497,7 +502,7 @@ void vfio_container_region_add(VFIOContainer >*bcontainer, > int ret; > Error *err = NULL; > >- if (!vfio_listener_valid_section(section, "region_add")) { >+ if (!vfio_listener_valid_section(section, false, "region_add")) { > return; > } > >@@ -663,7 +668,7 @@ static void vfio_listener_region_del(MemoryListener >*listener, > int ret; > bool try_unmap = true; > >- if (!vfio_listener_valid_section(section, "region_del")) { >+ if (!vfio_listener_valid_section(section, false, "region_del")) { > return; > } > >@@ -722,11 +727,11 @@ static void >vfio_listener_region_del(MemoryListener *listener, > } > > /* >- * Fake an IOTLB entry for identity mapping which is needed by >dirty >- * tracking. In fact, in unmap_bitmap, only translated_addr field is >- * used to set dirty bitmap. >+ * Fake an IOTLB entry for writable identity mapping which is >needed >+ * by dirty tracking. In fact, in unmap_bitmap, only >translated_addr >+ * field is used to set dirty bitmap. > */ >- if (!memory_region_is_iommu(section->mr)) { >+ if (!memory_region_is_iommu(section->mr) >&& !section->readonly) { > entry.iova = iova; > entry.translated_addr = iova; > iotlb = &entry; >@@ -834,7 +839,8 @@ static void >vfio_dirty_tracking_update(MemoryListener *listener, > container_of(listener, VFIODirtyRangesListener, listener); > hwaddr iova, end; > >- if (!vfio_listener_valid_section(section, "tracking_update") || >+ /* Bypass readonly section as it never becomes dirty */ >+ if (!vfio_listener_valid_section(section, true, "tracking_update") || > !vfio_get_section_iova_range(dirty->bcontainer, section, > &iova, &end, NULL)) { > return; >@@ -1093,6 +1099,19 @@ static void >vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) > if (!mr) { > goto out_unlock; > } >+ >+ /* >+ * The mapping is readonly when either it's a readonly mapping in guest >+ * or mapped target is readonly, bypass it for dirty tracking as it >+ * never becomes dirty. >+ */ >+ if (!(iotlb->perm & IOMMU_WO) || mr->readonly) { >+ trace_vfio_iommu_map_dirty_notify_skip_ro(iova, >+ iova + >iotlb->addr_mask); >+ rcu_read_unlock(); >+ return; >+ } >+ > translated_addr = memory_region_get_ram_addr(mr) + xlat; > > ret = vfio_container_query_dirty_bitmap(bcontainer, iova, >iotlb->addr_mask + 1, >@@ -1228,7 +1247,12 @@ static void >vfio_listener_log_sync(MemoryListener *listener, > int ret; > Error *local_err = NULL; > >- if (vfio_listener_skipped_section(section)) { >+ /* >+ * Bypass readonly section as it never becomes dirty, iommu memory >section >+ * is RW and never bypassed. The readonly mappings in iommu MR are >bypassed >+ * in vfio_iommu_map_dirty_notify(). >+ */ >+ if (vfio_listener_skipped_section(section, true)) { > return; > } > >diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >index 3c62bab764..180e3d526b 100644 >--- a/hw/vfio/trace-events >+++ b/hw/vfio/trace-events >@@ -103,6 +103,7 @@ vfio_listener_region_del(uint64_t start, uint64_t end) >"region_del 0x%"PRIx64" - > vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, >uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - >0x%"PRIx64"]" > vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t >max32, uint64_t min64, uint64_t max64, uint64_t minpci, uint64_t maxpci) >"nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - >0x%"PRIx64"], pci64:[0x%"PRIx64" - 0x%"PRIx64"]" > vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) >"iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 >+vfio_iommu_map_dirty_notify_skip_ro(uint64_t iova_start, uint64_t >iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 > > # container.c > vfio_container_query_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t >backend_flag, uint64_t bitmap_size, uint64_t translated_addr, uint64_t >dirty_pages) "iova=0x%"PRIx64" size=0x%"PRIx64" >backend_flag=0x%"PRIx64" bitmap_size=0x%"PRIx64" gpa=0x%"PRIx64" >dirty_pages=%"PRIu64 >-- >2.47.1
