On 22/09/2025 17:06, Joao Martins wrote: > On 22/09/2025 17:02, Cédric Le Goater wrote: >> On 9/22/25 07:49, Duan, Zhenzhong wrote: >>> Hi Joao, >>> >>>> -----Original Message----- >>>> From: Joao Martins <[email protected]> >>>> Subject: Re: [PATCH 3/5] vfio/iommufd: Add >>>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR flag support >>>> >>>> On 10/09/2025 03:36, Zhenzhong Duan wrote: >>>>> Pass IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR when doing the last >>>> dirty >>>>> bitmap query right before unmap, no PTEs flushes. This accelerates the >>>>> query without issue because unmap will tear down the mapping anyway. >>>>> >>>>> Add a new element dirty_tracking_flags in VFIOIOMMUFDContainer to >>>>> be used for the flags of iommufd dirty tracking. Currently it is >>>>> set to either IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR or 0 based on >>>>> the scenario. >>>>> >>>>> Co-developed-by: Joao Martins <[email protected]> >>>>> Signed-off-by: Joao Martins <[email protected]> >>>>> Signed-off-by: Zhenzhong Duan <[email protected]> >>>>> Tested-by: Xudong Hao <[email protected]> >>>>> Tested-by: Giovannio Cabiddu <[email protected]> >>>>> --- >>>>> hw/vfio/vfio-iommufd.h | 1 + >>>>> include/system/iommufd.h | 2 +- >>>>> backends/iommufd.c | 5 +++-- >>>>> hw/vfio/iommufd.c | 6 +++++- >>>>> backends/trace-events | 2 +- >>>>> 5 files changed, 11 insertions(+), 5 deletions(-) >>>>> >>>>> diff --git a/hw/vfio/vfio-iommufd.h b/hw/vfio/vfio-iommufd.h >>>>> index 07ea0f4304..e0af241c75 100644 >>>>> --- a/hw/vfio/vfio-iommufd.h >>>>> +++ b/hw/vfio/vfio-iommufd.h >>>>> @@ -26,6 +26,7 @@ typedef struct VFIOIOMMUFDContainer { >>>>> VFIOContainerBase bcontainer; >>>>> IOMMUFDBackend *be; >>>>> uint32_t ioas_id; >>>>> + uint64_t dirty_tracking_flags; >>>>> QLIST_HEAD(, VFIOIOASHwpt) hwpt_list; >>>>> } VFIOIOMMUFDContainer; >>>>> >>>>> diff --git a/include/system/iommufd.h b/include/system/iommufd.h >>>>> index c9c72ffc45..63898e7b0d 100644 >>>>> --- a/include/system/iommufd.h >>>>> +++ b/include/system/iommufd.h >>>>> @@ -64,7 +64,7 @@ bool >>>> iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t >>>> hwpt_id, >>>>> bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, >>>> uint32_t hwpt_id, >>>>> uint64_t iova, ram_addr_t >>>> size, >>>>> uint64_t page_size, >>>> uint64_t *data, >>>>> - Error **errp); >>>>> + uint64_t flags, Error >>>> **errp); >>>>> bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, >>>> uint32_t id, >>>>> uint32_t data_type, >>>> uint32_t entry_len, >>>>> uint32_t *entry_num, void >>>> *data, >>>>> diff --git a/backends/iommufd.c b/backends/iommufd.c >>>>> index 2a33c7ab0b..3c4f6157e2 100644 >>>>> --- a/backends/iommufd.c >>>>> +++ b/backends/iommufd.c >>>>> @@ -361,7 +361,7 @@ bool >>>> iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, >>>>> uint32_t hwpt_id, >>>>> uint64_t iova, ram_addr_t >>>> size, >>>>> uint64_t page_size, >>>> uint64_t *data, >>>>> - Error **errp) >>>>> + uint64_t flags, Error **errp) >>>>> { >>>>> int ret; >>>>> struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = { >>>>> @@ -371,11 +371,12 @@ bool >>>> iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, >>>>> .length = size, >>>>> .page_size = page_size, >>>>> .data = (uintptr_t)data, >>>>> + .flags = flags, >>>>> }; >>>>> >>>>> ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, >>>> &get_dirty_bitmap); >>>>> trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, >>>> size, >>>>> - page_size, ret ? errno : >>>> 0); >>>>> + flags, page_size, ret ? >>>> errno : 0); >>>>> if (ret) { >>>>> error_setg_errno(errp, errno, >>>>> "IOMMU_HWPT_GET_DIRTY_BITMAP >>>> (iova: 0x%"HWADDR_PRIx >>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c >>>>> index 0057488ce9..c897aa6b17 100644 >>>>> --- a/hw/vfio/iommufd.c >>>>> +++ b/hw/vfio/iommufd.c >>>>> @@ -62,7 +62,7 @@ static int iommufd_cdev_unmap_one(const >>>> VFIOContainerBase *bcontainer, >>>>> hwaddr iova, ram_addr_t size, >>>>> IOMMUTLBEntry *iotlb) >>>>> { >>>>> - const VFIOIOMMUFDContainer *container = >>>>> + VFIOIOMMUFDContainer *container = >>>>> container_of(bcontainer, VFIOIOMMUFDContainer, >>>> bcontainer); >>>>> bool need_dirty_sync = false; >>>>> Error *local_err = NULL; >>>>> @@ -73,9 +73,12 @@ static int iommufd_cdev_unmap_one(const >>>> VFIOContainerBase *bcontainer, >>>>> if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) { >>>>> if >>>> (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) && >>>>> bcontainer->dirty_pages_supported) { >>>>> + container->dirty_tracking_flags = >>>>> + >>>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR; >>>>> ret = vfio_container_query_dirty_bitmap(bcontainer, iova, >>>> size, >>>>> >>>> iotlb->translated_addr, >>>>> >>>> &local_err); >>>>> + container->dirty_tracking_flags = 0; >>>> >>>> Why not changing vfio_container_query_dirty_bitmap to pass a flags too, >>>> like >>>> the >>>> original patches? This is a little unnecssary odd style to pass a flag via >>>> container structure rather and then clearing. >>> >>> Just want to be simpler, original patch introduced a new parameter to >>> almost all >>> variants of *_query_dirty_bitmap() while the flags parameter is only used by >>> IOMMUFD backend when doing unmap_bitmap. Currently we already have three >>> backends, legacy VFIO, IOMMUFD and VFIO-user, only IOMMUFD need the flag. >>> >>> I take container->dirty_tracking_flags as a notification mechanism, so set >>> it >>> before >>> vfio_container_query_dirty_bitmap() and clear it thereafter. Maybe clearing >>> it in >>> iommufd_query_dirty_bitmap() is easier to be acceptable? >>> >>>> >>>> Part of the reason the original series had a VFIO_GET_DIRTY_NO_FLUSH for >>>> generic >>>> container abstraction was to not mix IOMMUFD UAPI specifics into base >>>> container >>>> API. Then in getting a VFIO_GET_DIRTY_NO_FLUSH, then type1 backend >>>> could just >>>> ignore the flag, while IOMMUFD translates it to >>>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR >>> >>> I did port original patch https://github.com/yiliu1765/qemu/ >>> commit/99f83595d79d2e4170c9e456cf1a7b9521bd4f80 >>> But it looks complex to have 'flags' parameter everywhere. >> I think I would prefer like Joao to avoid caching information if possible >> but I haven't check closely the mess it would introduce in the code. Let >> me check. >> > > My recollection was that it wasn't that much churn added as this series is > already doing the most of the churn. But I am checking how the code would look > like to properly respond to his suggestion on why he changing it towards > structuref field.
The churn should be similar to this patch: https://github.com/jpemartins/qemu/commit/5e1f11075299a5fa9564a26788dc9cc1717e297c It's mostly an interface parameter addition of flags. But there's now a few more callsites and I suppose that's the extra churn. But I don't think vfio-user would need to change. See snip below. I've pushed this here https://github.com/jpemartins/qemu/commits/relax-viommu-lm but showing the series wide changes for discussion. The thing I didn't do was have an intermediate 'backend agnostic' flag thingie like the original one (https://github.com/jpemartins/qemu/commit/1ef8abc896ae69be8896a68705fe4a87204709e0) with VFIO_GET_DIRTY_NO_FLUSH diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 56304978e1e8..2fe08e010405 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -211,17 +211,16 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, } static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) + VFIOBitmap *vbmap, hwaddr iova, hwaddr size, uint64_t flags, Error **errp) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); g_assert(vioc->query_dirty_bitmap); - return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size, - errp); + return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size, flags, errp); } static int vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) + VFIOBitmap *vbmap, hwaddr iova, hwaddr size, uint64_t flags, Error **errp) { VFIODevice *vbasedev; int ret; @@ -243,7 +242,7 @@ static int vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bc } int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr, Error **errp) + uint64_t size, uint64_t flags, ram_addr_t ram_addr, Error **errp) { bool all_device_dirty_tracking = vfio_container_devices_dirty_tracking_is_supported(bcontainer); @@ -267,10 +266,10 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint6 if (all_device_dirty_tracking) { ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size, - errp); + flags, errp); } else { ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size, - errp); + flags, errp); } if (ret) { @@ -280,7 +279,7 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint6 dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, vbmap.pages); - trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr, + trace_vfio_container_query_dirty_bitmap(iova, size, flags, vbmap.size, ram_addr, dirty_pages); out: g_free(vbmap.bitmap); diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 030c6d3f89cf..86c4d06298f7 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -169,7 +169,7 @@ static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer, } if (need_dirty_sync) { - ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size, + ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size, 0, iotlb->translated_addr, &local_err); if (ret) { error_report_err(local_err); @@ -267,7 +267,8 @@ vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, } static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) + VFIOBitmap *vbmap, hwaddr iova, hwaddr size, uint64_t flags, + Error **errp) { const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); struct vfio_iommu_type1_dirty_bitmap *dbitmap; diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index b82597de9116..c1c8755c6d7c 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -73,12 +73,10 @@ static int iommufd_cdev_unmap_one(const VFIOContainerBase *bcontainer, if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) { if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) && bcontainer->dirty_pages_supported) { - container->dirty_tracking_flags = - IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR; ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size, + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, iotlb->translated_addr, &local_err); - container->dirty_tracking_flags = 0; if (ret) { error_report_err(local_err); } @@ -95,7 +93,7 @@ static int iommufd_cdev_unmap_one(const VFIOContainerBase *bcontainer, } if (need_dirty_sync) { - ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size, + ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size, 0, iotlb->translated_addr, &local_err); if (ret) { @@ -235,7 +233,7 @@ err: static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, - hwaddr size, Error **errp) + hwaddr size, uint64_t flags, Error **errp) { VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, @@ -251,8 +249,7 @@ static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer, if (!iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id, iova, size, page_size, (uint64_t *)vbmap->bitmap, - container->dirty_tracking_flags, - errp)) { + flags, errp)) { return -EINVAL; } } diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index e09383316598..27cbb45282ae 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -1082,7 +1082,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) translated_addr = memory_region_get_ram_addr(mr) + xlat; ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1, - translated_addr, &local_err); + 0, translated_addr, &local_err); if (ret) { error_prepend(&local_err, "vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " @@ -1118,7 +1118,7 @@ static int vfio_ram_discard_query_dirty_bitmap(MemoryRegionSection *section, * Sync the whole mapped region (spanning multiple individual mappings) * in one go. */ - ret = vfio_container_query_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr, + ret = vfio_container_query_dirty_bitmap(vrdl->bcontainer, iova, size, 0, ram_addr, &local_err); if (ret) { error_report_err(local_err); @@ -1203,7 +1203,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, return vfio_container_query_dirty_bitmap(bcontainer, REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), - int128_get64(section->size), ram_addr, errp); + int128_get64(section->size), 0, ram_addr, errp); } static void vfio_listener_log_sync(MemoryListener *listener, diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index e3d571f8c845..ee18dafdb2ae 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -105,7 +105,7 @@ vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 # container-base.c -vfio_container_query_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) " iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 +vfio_container_query_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t flags, uint64_t bitmap_size, uint64_t start, uint64_ t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" flags= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%" PRIu64 # container.c vfio_container_disconnect(int fd) "close container->fd=%d" diff --git a/hw/vfio/vfio-iommufd.h b/hw/vfio/vfio-iommufd.h index e0af241c75cf..07ea0f430496 100644 --- a/hw/vfio/vfio-iommufd.h +++ b/hw/vfio/vfio-iommufd.h @@ -26,7 +26,6 @@ typedef struct VFIOIOMMUFDContainer { VFIOContainerBase bcontainer; IOMMUFDBackend *be; uint32_t ioas_id; - uint64_t dirty_tracking_flags; QLIST_HEAD(, VFIOIOASHwpt) hwpt_list; } VFIOIOMMUFDContainer; diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index acbd48a18a3a..e88b690cf423 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -98,7 +98,7 @@ bool vfio_container_dirty_tracking_is_started( bool vfio_container_devices_dirty_tracking_is_supported( const VFIOContainerBase *bcontainer); int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - uint64_t iova, uint64_t size, ram_addr_t ram_addr, Error **errp); + uint64_t iova, uint64_t size, uint64_t flags, ram_addr_t ram_addr, Error **errp); GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer); @@ -252,12 +252,14 @@ struct VFIOIOMMUClass { * @vbmap: #VFIOBitmap internal bitmap structure * @iova: iova base address * @size: size of iova range + * @flags: flags to the dirty tracking query * @errp: pointer to Error*, to store an error if it happens. * * Returns zero to indicate success and negative for error. */ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); + VFIOBitmap *vbmap, hwaddr iova, hwaddr size, uint64_t flags, + Error **errp); /* PCI specific */ int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
