On Wed, 22 Feb 2023 19:49:05 +0200 Avihai Horon <[email protected]> wrote:
> From: Joao Martins <[email protected]> > > According to the device DMA logging uAPI, IOVA ranges to be logged by > the device must be provided all at once upon DMA logging start. > > As preparation for the following patches which will add device dirty > page tracking, keep a record of all DMA mapped IOVA ranges so later they > can be used for DMA logging start. > > Note that when vIOMMU is enabled DMA mapped IOVA ranges are not tracked. > This is due to the dynamic nature of vIOMMU DMA mapping/unmapping. > Following patches will address the vIOMMU case specifically. > > Signed-off-by: Joao Martins <[email protected]> > Signed-off-by: Avihai Horon <[email protected]> > --- > include/hw/vfio/vfio-common.h | 3 ++ > hw/vfio/common.c | 86 +++++++++++++++++++++++++++++++++-- > 2 files changed, 86 insertions(+), 3 deletions(-) > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index ee55d442b4..6f36876ce0 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -23,6 +23,7 @@ > > #include "exec/memory.h" > #include "qemu/queue.h" > +#include "qemu/iova-tree.h" > #include "qemu/notify.h" > #include "ui/console.h" > #include "hw/display/ramfb.h" > @@ -92,6 +93,8 @@ typedef struct VFIOContainer { > uint64_t max_dirty_bitmap_size; > unsigned long pgsizes; > unsigned int dma_max_mappings; > + IOVATree *mappings; > + QemuMutex mappings_mutex; > QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; > QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; > QLIST_HEAD(, VFIOGroup) group_list; > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 84f08bdbbb..6041da6c7e 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -44,6 +44,7 @@ > #include "migration/blocker.h" > #include "migration/qemu-file.h" > #include "sysemu/tpm.h" > +#include "qemu/iova-tree.h" > > VFIOGroupList vfio_group_list = > QLIST_HEAD_INITIALIZER(vfio_group_list); > @@ -426,6 +427,11 @@ void vfio_unblock_multiple_devices_migration(void) > multiple_devices_migration_blocker = NULL; > } > > +static bool vfio_have_giommu(VFIOContainer *container) > +{ > + return !QLIST_EMPTY(&container->giommu_list); > +} > + > static void vfio_set_migration_error(int err) > { > MigrationState *ms = migrate_get_current(); > @@ -499,6 +505,51 @@ static bool > vfio_devices_all_running_and_mig_active(VFIOContainer *container) > return true; > } > > +static int vfio_record_mapping(VFIOContainer *container, hwaddr iova, > + hwaddr size, bool readonly) > +{ > + DMAMap map = { > + .iova = iova, > + .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size > */ > + .perm = readonly ? IOMMU_RO : IOMMU_RW, > + }; > + int ret; > + > + if (vfio_have_giommu(container)) { > + return 0; > + } > + > + WITH_QEMU_LOCK_GUARD(&container->mappings_mutex) { > + ret = iova_tree_insert(container->mappings, &map); > + if (ret) { > + if (ret == IOVA_ERR_INVALID) { > + ret = -EINVAL; > + } else if (ret == IOVA_ERR_OVERLAP) { > + ret = -EEXIST; > + } > + } > + } > + > + return ret; > +} > + > +static void vfio_erase_mapping(VFIOContainer *container, hwaddr iova, > + hwaddr size) > +{ > + DMAMap map = { > + .iova = iova, > + .size = size - 1, /* IOVATree is inclusive, so subtract 1 from size > */ > + }; > + > + if (vfio_have_giommu(container)) { > + return; > + } > + > + WITH_QEMU_LOCK_GUARD(&container->mappings_mutex) { > + iova_tree_remove(container->mappings, map); > + } > +} Nit, 'insert' and 'remove' to match the IOVATree semantics? > static int vfio_dma_unmap_bitmap(VFIOContainer *container, > hwaddr iova, ram_addr_t size, > IOMMUTLBEntry *iotlb) > @@ -599,6 +650,8 @@ static int vfio_dma_unmap(VFIOContainer *container, > DIRTY_CLIENTS_NOCODE); > } > > + vfio_erase_mapping(container, iova, size); > + > return 0; > } > > @@ -612,6 +665,16 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr > iova, > .iova = iova, > .size = size, > }; > + int ret; > + > + ret = vfio_record_mapping(container, iova, size, readonly); > + if (ret) { > + error_report("vfio: Failed to record mapping, iova: 0x%" HWADDR_PRIx > + ", size: 0x" RAM_ADDR_FMT ", ret: %d (%s)", > + iova, size, ret, strerror(-ret)); > + > + return ret; > + } Is there no way to replay the mappings when a migration is started? This seems like a horrible latency and bloat trade-off for the possibility that the VM might migrate and the device might support these features. Our performance with vIOMMU is already terrible, I can't help but believe this makes it worse. Thanks, Alex
