On 23/09/2025 03:50, Duan, Zhenzhong wrote:
>> -----Original Message-----
>> From: Joao Martins <[email protected]>
>> Subject: Re: [PATCH 3/5] vfio/iommufd: Add
>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR flag support
>>
>> On 22/09/2025 17:06, Joao Martins wrote:
>>> On 22/09/2025 17:02, Cédric Le Goater wrote:
>>>> On 9/22/25 07:49, Duan, Zhenzhong wrote:
>>>>> Hi Joao,
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Joao Martins <[email protected]>
>>>>>> Subject: Re: [PATCH 3/5] vfio/iommufd: Add
>>>>>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR flag support
>>>>>>
>>>>>> On 10/09/2025 03:36, Zhenzhong Duan wrote:
>>>>>>> Pass IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR when doing the
>> last
>>>>>> dirty
>>>>>>> bitmap query right before unmap, no PTEs flushes. This accelerates the
>>>>>>> query without issue because unmap will tear down the mapping
>> anyway.
>>>>>>>
>>>>>>> Add a new element dirty_tracking_flags in VFIOIOMMUFDContainer to
>>>>>>> be used for the flags of iommufd dirty tracking. Currently it is
>>>>>>> set to either IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR or 0
>> based on
>>>>>>> the scenario.
>>>>>>>
>>>>>>> Co-developed-by: Joao Martins <[email protected]>
>>>>>>> Signed-off-by: Joao Martins <[email protected]>
>>>>>>> Signed-off-by: Zhenzhong Duan <[email protected]>
>>>>>>> Tested-by: Xudong Hao <[email protected]>
>>>>>>> Tested-by: Giovannio Cabiddu <[email protected]>
>>>>>>> ---
>>>>>>> hw/vfio/vfio-iommufd.h | 1 +
>>>>>>> include/system/iommufd.h | 2 +-
>>>>>>> backends/iommufd.c | 5 +++--
>>>>>>> hw/vfio/iommufd.c | 6 +++++-
>>>>>>> backends/trace-events | 2 +-
>>>>>>> 5 files changed, 11 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/hw/vfio/vfio-iommufd.h b/hw/vfio/vfio-iommufd.h
>>>>>>> index 07ea0f4304..e0af241c75 100644
>>>>>>> --- a/hw/vfio/vfio-iommufd.h
>>>>>>> +++ b/hw/vfio/vfio-iommufd.h
>>>>>>> @@ -26,6 +26,7 @@ typedef struct VFIOIOMMUFDContainer {
>>>>>>> VFIOContainerBase bcontainer;
>>>>>>> IOMMUFDBackend *be;
>>>>>>> uint32_t ioas_id;
>>>>>>> + uint64_t dirty_tracking_flags;
>>>>>>> QLIST_HEAD(, VFIOIOASHwpt) hwpt_list;
>>>>>>> } VFIOIOMMUFDContainer;
>>>>>>>
>>>>>>> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
>>>>>>> index c9c72ffc45..63898e7b0d 100644
>>>>>>> --- a/include/system/iommufd.h
>>>>>>> +++ b/include/system/iommufd.h
>>>>>>> @@ -64,7 +64,7 @@ bool
>>>>>> iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t
>>>>>> hwpt_id,
>>>>>>> bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
>>>>>> uint32_t hwpt_id,
>>>>>>> uint64_t
>> iova, ram_addr_t
>>>>>> size,
>>>>>>> uint64_t
>> page_size,
>>>>>> uint64_t *data,
>>>>>>> - Error
>> **errp);
>>>>>>> + uint64_t
>> flags, Error
>>>>>> **errp);
>>>>>>> bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be,
>>>>>> uint32_t id,
>>>>>>> uint32_t
>> data_type,
>>>>>> uint32_t entry_len,
>>>>>>> uint32_t
>> *entry_num, void
>>>>>> *data,
>>>>>>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>>>>>>> index 2a33c7ab0b..3c4f6157e2 100644
>>>>>>> --- a/backends/iommufd.c
>>>>>>> +++ b/backends/iommufd.c
>>>>>>> @@ -361,7 +361,7 @@ bool
>>>>>> iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
>>>>>>> uint32_t
>> hwpt_id,
>>>>>>> uint64_t
>> iova, ram_addr_t
>>>>>> size,
>>>>>>> uint64_t
>> page_size,
>>>>>> uint64_t *data,
>>>>>>> - Error **errp)
>>>>>>> + uint64_t
>> flags, Error **errp)
>>>>>>> {
>>>>>>> int ret;
>>>>>>> struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
>>>>>>> @@ -371,11 +371,12 @@ bool
>>>>>> iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
>>>>>>> .length = size,
>>>>>>> .page_size = page_size,
>>>>>>> .data = (uintptr_t)data,
>>>>>>> + .flags = flags,
>>>>>>> };
>>>>>>>
>>>>>>> ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP,
>>>>>> &get_dirty_bitmap);
>>>>>>> trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id,
>> iova,
>>>>>> size,
>>>>>>>
>> - page_size,
>> ret ? errno :
>>>>>> 0);
>>>>>>> + flags,
>> page_size, ret ?
>>>>>> errno : 0);
>>>>>>> if (ret) {
>>>>>>> error_setg_errno(errp, errno,
>>>>>>> "IOMMU_HWPT_GET_DIRTY_
>> BITMAP
>>>>>> (iova: 0x%"HWADDR_PRIx
>>>>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>>>>> index 0057488ce9..c897aa6b17 100644
>>>>>>> --- a/hw/vfio/iommufd.c
>>>>>>> +++ b/hw/vfio/iommufd.c
>>>>>>> @@ -62,7 +62,7 @@ static int iommufd_cdev_unmap_one(const
>>>>>> VFIOContainerBase *bcontainer,
>>>>>>> hwaddr iova,
>> ram_addr_t size,
>>>>>>> IOMMUTLBEntr
>> y *iotlb)
>>>>>>> {
>>>>>>> - const VFIOIOMMUFDContainer *container =
>>>>>>> + VFIOIOMMUFDContainer *container =
>>>>>>> container_of(bcontainer, VFIOIOMMUFDContainer,
>>>>>> bcontainer);
>>>>>>> bool need_dirty_sync = false;
>>>>>>> Error *local_err = NULL;
>>>>>>> @@ -73,9 +73,12 @@ static int iommufd_cdev_unmap_one(const
>>>>>> VFIOContainerBase *bcontainer,
>>>>>>> if (iotlb &&
>> vfio_container_dirty_tracking_is_started(bcontainer)) {
>>>>>>> if
>>>>>> (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
>>>>>>> bcontainer->dirty_pages_supported) {
>>>>>>> + container->dirty_tracking_flags =
>>>>>>> +
>>>>>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR;
>>>>>>> ret =
>> vfio_container_query_dirty_bitmap(bcontainer, iova,
>>>>>> size,
>>>>>>>
>>>>>> iotlb->translated_addr,
>>>>>>>
>>>>>> &local_err);
>>>>>>> + container->dirty_tracking_flags = 0;
>>>>>>
>>>>>> Why not changing vfio_container_query_dirty_bitmap to pass a flags
>> too, like
>>>>>> the
>>>>>> original patches? This is a little unnecssary odd style to pass a flag
>>>>>> via
>>>>>> container structure rather and then clearing.
>>>>>
>>>>> Just want to be simpler, original patch introduced a new parameter to
>> almost all
>>>>> variants of *_query_dirty_bitmap() while the flags parameter is only used
>> by
>>>>> IOMMUFD backend when doing unmap_bitmap. Currently we already
>> have three
>>>>> backends, legacy VFIO, IOMMUFD and VFIO-user, only IOMMUFD need
>> the flag.
>>>>>
>>>>> I take container->dirty_tracking_flags as a notification mechanism, so set
>> it
>>>>> before
>>>>> vfio_container_query_dirty_bitmap() and clear it thereafter. Maybe
>> clearing it in
>>>>> iommufd_query_dirty_bitmap() is easier to be acceptable?
>>>>>
>>>>>>
>>>>>> Part of the reason the original series had a VFIO_GET_DIRTY_NO_FLUSH
>> for
>>>>>> generic
>>>>>> container abstraction was to not mix IOMMUFD UAPI specifics into base
>>>>>> container
>>>>>> API. Then in getting a VFIO_GET_DIRTY_NO_FLUSH, then type1 backend
>>>>>> could just
>>>>>> ignore the flag, while IOMMUFD translates it to
>>>>>> IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR
>>>>>
>>>>> I did port original patch https://github.com/yiliu1765/qemu/
>>>>> commit/99f83595d79d2e4170c9e456cf1a7b9521bd4f80
>>>>> But it looks complex to have 'flags' parameter everywhere.
>>>> I think I would prefer like Joao to avoid caching information if possible
>>>> but I haven't check closely the mess it would introduce in the code. Let
>>>> me check.
>>>>
>>>
>>> My recollection was that it wasn't that much churn added as this series is
>>> already doing the most of the churn. But I am checking how the code would
>> look
>>> like to properly respond to his suggestion on why he changing it towards
>>> structuref field.
>>
>> The churn should be similar to this patch:
>>
>> https://github.com/jpemartins/qemu/commit/5e1f11075299a5fa9564a26788
>> dc9cc1717e297c
>>
>> It's mostly an interface parameter addition of flags.
>
> Yes, it's a general interface parameter but it's only used by IOMMUFD.
> That's my only argument.
>
You could use that argument for anything that's not common and unique to each
backend. type1 dirty tracking is effectively "frozen" in UAPI (IIRC) so I won't
expect flags. There's device dirty tracking, IOMMUFD dirty tracking and
vfio-user dirty tracking. While I am not anticipating any new thing here (at
least in the needs I have ahead, can't speak for other companies/contributors);
but you could see the flags as future proofing it.
The ugly part of the alternative is that it sort of limits the interface to be
single-user only as nobody call into IOMMUFD with the dirty_tracking_flags if
it's called concurrently for some other purpose than unmap. We sort of use the
container data structure as a argument storage for a single call as opposed to
store some intermediary state.
I mean I agree with you that there's churn, but it's essentially a argument
change that's only churn-y because there's 3 users.
>> But there's now a few more callsites and I suppose that's the extra churn.
>> But
>> I don't think
>> vfio-user would need to change. See snip below.
>
> vfio-user need same change as vfio-legacy though they don't use 'flags'
> parameter,
> or else there is build error:
>
> ../hw/vfio-user/container.c:340:30: error: assignment to ‘int (*)(const
> VFIOContainerBase *, VFIOBitmap *, hwaddr, hwaddr, uint64_t, Error **)’
> {aka ‘int (*)(const VFIOContainerBase *, VFIOBitmap *, long unsigned int,
> long unsigned int, long unsigned int, Error **)’} from incompatible pointer
> type ‘int (*)(const VFIOContainerBase *, VFIOBitmap *, hwaddr, hwaddr,
> Error **)’ {aka ‘int (*)(const VFIOContainerBase *, VFIOBitmap *, long
> unsigned int, long unsigned int, Error **)’}
> [-Werror=incompatible-pointer-types]
> 340 | vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
>
yes -- I didn't have VFIO_USER compiled in probably why I didn't notice it.
>>
>> I've pushed this here
>> https://github.com/jpemartins/qemu/commits/relax-viommu-lm but showing
>> the
>> series wide changes for discussion. The thing I didn't do was have an
>> intermediate 'backend
>> agnostic' flag thingie like the original one
>> (https://github.com/jpemartins/qemu/commit/1ef8abc896ae69be8896a6870
>> 5fe4a87204709e0) with
>> VFIO_GET_DIRTY_NO_FLUSH
>>
Instead of a flags, an alternative would be to have a 'bool unmap' which then
IOMMUFD can translate to IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR.
That's a bit cleaner as to avoid leaking backend UAPIs into the API but it
limits to 'unmap' case.
>> diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
>> index 56304978e1e8..2fe08e010405 100644
>> --- a/hw/vfio/container-base.c
>> +++ b/hw/vfio/container-base.c
>> @@ -211,17 +211,16 @@ static int
>> vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
>> }
>>
>> static int vfio_container_iommu_query_dirty_bitmap(const
>> VFIOContainerBase *bcontainer,
>> - VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>> **errp)
>> + VFIOBitmap *vbmap, hwaddr iova, hwaddr size,
>> uint64_t flags, Error **errp)
>> {
>> VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
>>
>> g_assert(vioc->query_dirty_bitmap);
>> - return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
>> - errp);
>> + return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size, flags,
>> errp);
>> }
>>
>> static int vfio_container_devices_query_dirty_bitmap(const
>> VFIOContainerBase *bcontainer,
>> - VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error
>> **errp)
>> + VFIOBitmap *vbmap, hwaddr iova, hwaddr size,
>> uint64_t flags, Error **errp)
>
> I think we can drop 'flags' here as you did in your old patch?
>
Yeah
>> {
>> VFIODevice *vbasedev;
>> int ret;
>> @@ -243,7 +242,7 @@ static int
>> vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bc
>> }
>>
>> int vfio_container_query_dirty_bitmap(const VFIOContainerBase
>> *bcontainer, uint64_t iova,
>> - uint64_t size, ram_addr_t ram_addr, Error
>> **errp)
>> + uint64_t size, uint64_t flags, ram_addr_t
>> ram_addr, Error **errp)
>> {
>> bool all_device_dirty_tracking =
>> vfio_container_devices_dirty_tracking_is_supported(bcontainer);
>> @@ -267,10 +266,10 @@ int vfio_container_query_dirty_bitmap(const
>> VFIOContainerBase *bcontainer, uint6
>>
>> if (all_device_dirty_tracking) {
>> ret = vfio_container_devices_query_dirty_bitmap(bcontainer,
>> &vbmap, iova, size,
>> - errp);
>> + flags,
>> errp);
>
> Same here.
/me nods