From: John Johnson <john.g.john...@oracle.com> Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> --- hw/vfio/user-protocol.h | 18 +++++++++++++++++ hw/vfio/user.h | 3 +++ hw/vfio/common.c | 23 ++++++++++++++++----- hw/vfio/migration.c | 34 +++++++++++++++++-------------- hw/vfio/pci.c | 12 +++++++++++ hw/vfio/user.c | 45 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 115 insertions(+), 20 deletions(-)
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index c5d9473f8f..bad067a570 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -182,6 +182,10 @@ typedef struct { char data[]; } VFIOUserDMARW; +/* + * VFIO_USER_DIRTY_PAGES + */ + /*imported from struct vfio_bitmap */ typedef struct { uint64_t pgsize; @@ -189,4 +193,18 @@ typedef struct { char data[]; } VFIOUserBitmap; +/* imported from struct vfio_iommu_type1_dirty_bitmap_get */ +typedef struct { + uint64_t iova; + uint64_t size; + VFIOUserBitmap bitmap; +} VFIOUserBitmapRange; + +/* imported from struct vfio_iommu_type1_dirty_bitmap */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; +} VFIOUserDirtyPages; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.h b/hw/vfio/user.h index 5d4d0a43ba..905e0ee28d 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -87,6 +87,9 @@ int vfio_user_region_read(VFIODevice *vbasedev, uint32_t index, uint64_t offset, int vfio_user_region_write(VFIODevice *vbasedev, uint32_t index, uint64_t offset, uint32_t count, void *data); void vfio_user_reset(VFIODevice *vbasedev); +int vfio_user_dirty_bitmap(VFIOProxy *proxy, + struct vfio_iommu_type1_dirty_bitmap *bitmap, + struct vfio_iommu_type1_dirty_bitmap_get *range); void vfio_user_drain_reqs(VFIOProxy *proxy); #endif /* VFIO_USER_H */ diff --git a/hw/vfio/common.c b/hw/vfio/common.c index a532e52bcf..09d0147df2 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1303,10 +1303,19 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; } - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); - if (ret) { - error_report("Failed to set dirty tracking flag 0x%x errno: %d", - dirty.flags, errno); + if (container->proxy != NULL) { + ret = vfio_user_dirty_bitmap(container->proxy, &dirty, NULL); + if (ret) { + error_report("Failed to set dirty tracking flag 0x%x errno: %d", + dirty.flags, -ret); + } + } else { + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); + if (ret) { + error_report("Failed to set dirty tracking flag 0x%x errno: %d", + dirty.flags, errno); + ret = -errno; + } } } @@ -1356,7 +1365,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, goto err_out; } - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + if (container->proxy != NULL) { + ret = vfio_user_dirty_bitmap(container->proxy, dbitmap, range); + } else { + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + } if (ret) { error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 82f654afb6..89926a3b01 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -27,6 +27,7 @@ #include "pci.h" #include "trace.h" #include "hw/hw.h" +#include "user.h" /* * Flags to be used as unique delimiters for VFIO devices in the migration @@ -49,10 +50,18 @@ static int64_t bytes_transferred; static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, off_t off, bool iswrite) { + VFIORegion *region = &vbasedev->migration->region; int ret; - ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : - pread(vbasedev->fd, val, count, off); + if (vbasedev->proxy != NULL) { + ret = iswrite ? + vfio_user_region_write(vbasedev, region->nr, off, count, val) : + vfio_user_region_read(vbasedev, region->nr, off, count, val); + } else { + off += region->fd_offset; + ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : + pread(vbasedev->fd, val, count, off); + } if (ret < count) { error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, @@ -111,9 +120,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, uint32_t value) { VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - off_t dev_state_off = region->fd_offset + - VFIO_MIG_STRUCT_OFFSET(device_state); + off_t dev_state_off = VFIO_MIG_STRUCT_OFFSET(device_state); uint32_t device_state; int ret; @@ -201,13 +208,13 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) int ret; ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); + VFIO_MIG_STRUCT_OFFSET(data_offset)); if (ret < 0) { return ret; } ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); + VFIO_MIG_STRUCT_OFFSET(data_size)); if (ret < 0) { return ret; } @@ -233,8 +240,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) } buf_allocated = true; - ret = vfio_mig_read(vbasedev, buf, sec_size, - region->fd_offset + data_offset); + ret = vfio_mig_read(vbasedev, buf, sec_size, data_offset); if (ret < 0) { g_free(buf); return ret; @@ -269,7 +275,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, do { ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); + VFIO_MIG_STRUCT_OFFSET(data_offset)); if (ret < 0) { return ret; } @@ -309,8 +315,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, qemu_get_buffer(f, buf, sec_size); if (buf_alloc) { - ret = vfio_mig_write(vbasedev, buf, sec_size, - region->fd_offset + data_offset); + ret = vfio_mig_write(vbasedev, buf, sec_size, data_offset); g_free(buf); if (ret < 0) { @@ -322,7 +327,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, } ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); + VFIO_MIG_STRUCT_OFFSET(data_size)); if (ret < 0) { return ret; } @@ -334,12 +339,11 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, static int vfio_update_pending(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; uint64_t pending_bytes = 0; int ret; ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); if (ret < 0) { migration->pending_bytes = 0; return ret; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 4b933ed10f..976fb89786 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3658,6 +3658,13 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) } } + if (!pdev->failover_pair_id) { + ret = vfio_migration_probe(&vdev->vbasedev, errp); + if (ret) { + error_report("%s: Migration disabled", vdev->vbasedev.name); + } + } + vfio_register_err_notifier(vdev); vfio_register_req_notifier(vdev); @@ -3709,6 +3716,11 @@ static void vfio_user_pci_reset(DeviceState *dev) static Property vfio_user_pci_dev_properties[] = { DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), DEFINE_PROP_BOOL("secure-dma", VFIOUserPCIDevice, secure_dma, false), + DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, + vbasedev.enable_migration, false), + DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, + vbasedev.pre_copy_dirty_page_tracking, + ON_OFF_AUTO_ON), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/vfio/user.c b/hw/vfio/user.c index 7de2125346..486f7c0fe7 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -1057,3 +1057,48 @@ void vfio_user_reset(VFIODevice *vbasedev) error_printf("reset reply error %d\n", msg.error_reply); } } + +int vfio_user_dirty_bitmap(VFIOProxy *proxy, + struct vfio_iommu_type1_dirty_bitmap *cmd, + struct vfio_iommu_type1_dirty_bitmap_get *dbitmap) +{ + g_autofree struct { + VFIOUserDirtyPages msg; + VFIOUserBitmapRange range; + } *msgp = NULL; + int msize, rsize; + + /* + * If just the command is sent, the returned bitmap isn't needed. + * The bitmap structs are different from the ioctl() versions, + * ioctl() returns the bitmap in a local VA + */ + if (dbitmap != NULL) { + msize = sizeof(*msgp); + rsize = msize + dbitmap->bitmap.size; + msgp = g_malloc0(rsize); + msgp->range.iova = dbitmap->iova; + msgp->range.size = dbitmap->size; + msgp->range.bitmap.pgsize = dbitmap->bitmap.pgsize; + msgp->range.bitmap.size = dbitmap->bitmap.size; + } else { + msize = rsize = sizeof(VFIOUserDirtyPages); + msgp = g_malloc0(rsize); + } + + vfio_user_request_msg(&msgp->msg.hdr, VFIO_USER_DIRTY_PAGES, msize, 0); + msgp->msg.argsz = msize - sizeof(msgp->msg.hdr); + msgp->msg.flags = cmd->flags; + + vfio_user_send_recv(proxy, &msgp->msg.hdr, NULL, rsize, 0); + if (msgp->msg.hdr.flags & VFIO_USER_ERROR) { + return -msgp->msg.hdr.error_reply; + } + + if (dbitmap != NULL) { + memcpy(dbitmap->bitmap.data, &msgp->range.bitmap.data, + dbitmap->bitmap.size); + } + + return 0; +} -- 2.25.1