From: John G Johnson <john.g.john...@oracle.com> Send migration region operations to remote server. Send VFIO_USER_USER_DIRTY_PAGES to get remote dirty bitmap.
Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> --- hw/vfio/user.h | 17 +++++++++++++++ hw/vfio/common.c | 51 ++++++++++++++++++++++++++++++++++++--------- hw/vfio/migration.c | 35 ++++++++++++++++++------------- hw/vfio/pci.c | 7 +++++++ hw/vfio/user.c | 45 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 130 insertions(+), 25 deletions(-) diff --git a/hw/vfio/user.h b/hw/vfio/user.h index 95c2fb1707..eeb328c0a9 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -230,6 +230,20 @@ struct vfio_user_irq_set { uint32_t count; }; +/* imported from struct vfio_iommu_type1_dirty_bitmap_get */ +struct vfio_user_bitmap_range { + uint64_t iova; + uint64_t size; + struct vfio_user_bitmap bitmap; +}; + +/* imported from struct vfio_iommu_type1_dirty_bitmap */ +struct vfio_user_dirty_pages { + vfio_user_hdr_t hdr; + uint32_t argsz; + uint32_t flags; +}; + void vfio_user_recv(void *opaque); void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret); VFIOProxy *vfio_user_connect_dev(char *sockname, Error **errp); @@ -255,4 +269,7 @@ void vfio_user_set_reqhandler(VFIODevice *vbasdev, void *reqarg); int vfio_user_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq); void vfio_user_reset(VFIODevice *vbasedev); +int vfio_user_dirty_bitmap(VFIOProxy *proxy, + struct vfio_iommu_type1_dirty_bitmap *bitmap, + struct vfio_iommu_type1_dirty_bitmap_get *range); #endif /* VFIO_USER_H */ diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 953d9e7b55..bd31731c0f 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -460,7 +460,11 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, goto unmap_exit; } - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); + if (container->proxy != NULL) { + ret = vfio_user_dma_unmap(container->proxy, unmap, bitmap); + } else { + ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); + } if (!ret) { cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, iotlb->translated_addr, pages); @@ -1278,10 +1282,19 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; } - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); - if (ret) { - error_report("Failed to set dirty tracking flag 0x%x errno: %d", - dirty.flags, errno); + if (container->proxy != NULL) { + ret = vfio_user_dirty_bitmap(container->proxy, &dirty, NULL); + if (ret) { + error_report("Failed to set dirty tracking flag 0x%x errno: %d", + dirty.flags, -ret); + } + } else { + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); + if (ret) { + error_report("Failed to set dirty tracking flag 0x%x errno: %d", + dirty.flags, errno); + ret = -errno; + } } } @@ -1331,7 +1344,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, goto err_out; } - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + if (container->proxy != NULL) { + ret = vfio_user_dirty_bitmap(container->proxy, dbitmap, range); + } else { + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + } if (ret) { error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, @@ -2282,6 +2299,12 @@ void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as) VFIOAddressSpace *space; VFIOContainer *container; + if (QLIST_EMPTY(&vfio_group_list)) { + qemu_register_reset(vfio_reset_handler, NULL); + } + + QLIST_INSERT_HEAD(&vfio_group_list, group, next); + /* * try to mirror vfio_connect_container() * as much as possible @@ -2292,18 +2315,26 @@ void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as) container = g_malloc0(sizeof(*container)); container->space = space; container->fd = -1; + QLIST_INIT(&container->giommu_list); QLIST_INIT(&container->hostwin_list); container->proxy = proxy; + /* + * The proxy uses a SW IOMMU in lieu of the HW one + * used in the ioctl() version. Use TYPE1 with the + * target's page size for maximum capatibility + */ container->iommu_type = VFIO_TYPE1_IOMMU; - vfio_host_win_add(container, 0, (hwaddr)-1, 4096); - container->pgsizes = 4096; + vfio_host_win_add(container, 0, (hwaddr)-1, TARGET_PAGE_SIZE); + container->pgsizes = TARGET_PAGE_SIZE; + + container->dirty_pages_supported = true; + container->max_dirty_bitmap_size = VFIO_USER_DEF_MAX_XFER; + container->dirty_pgsizes = TARGET_PAGE_SIZE; QLIST_INIT(&container->group_list); QLIST_INSERT_HEAD(&space->containers, container, next); - QLIST_INIT(&container->giommu_list); - group->container = container; QLIST_INSERT_HEAD(&container->group_list, group, container_next); diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 82f654afb6..8005b1171a 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -27,6 +27,7 @@ #include "pci.h" #include "trace.h" #include "hw/hw.h" +#include "user.h" /* * Flags to be used as unique delimiters for VFIO devices in the migration @@ -49,10 +50,18 @@ static int64_t bytes_transferred; static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, off_t off, bool iswrite) { + VFIORegion *region = &vbasedev->migration->region; int ret; - ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : - pread(vbasedev->fd, val, count, off); + if (vbasedev->proxy != NULL) { + ret = iswrite ? + vfio_user_region_write(vbasedev, region->nr, off, count, val) : + vfio_user_region_read(vbasedev, region->nr, off, count, val); + } else { + off += region->fd_offset; + ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : + pread(vbasedev->fd, val, count, off); + } if (ret < count) { error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, @@ -111,9 +120,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, uint32_t value) { VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - off_t dev_state_off = region->fd_offset + - VFIO_MIG_STRUCT_OFFSET(device_state); + off_t dev_state_off = VFIO_MIG_STRUCT_OFFSET(device_state); uint32_t device_state; int ret; @@ -201,13 +208,13 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) int ret; ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); + VFIO_MIG_STRUCT_OFFSET(data_offset)); if (ret < 0) { return ret; } ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); + VFIO_MIG_STRUCT_OFFSET(data_size)); if (ret < 0) { return ret; } @@ -233,8 +240,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) } buf_allocated = true; - ret = vfio_mig_read(vbasedev, buf, sec_size, - region->fd_offset + data_offset); + ret = vfio_mig_read(vbasedev, buf, sec_size, data_offset); if (ret < 0) { g_free(buf); return ret; @@ -269,7 +275,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, do { ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); + VFIO_MIG_STRUCT_OFFSET(data_offset)); if (ret < 0) { return ret; } @@ -309,8 +315,8 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, qemu_get_buffer(f, buf, sec_size); if (buf_alloc) { - ret = vfio_mig_write(vbasedev, buf, sec_size, - region->fd_offset + data_offset); + + ret = vfio_mig_write(vbasedev, buf, sec_size, data_offset); g_free(buf); if (ret < 0) { @@ -322,7 +328,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, } ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); + VFIO_MIG_STRUCT_OFFSET(data_size)); if (ret < 0) { return ret; } @@ -334,12 +340,11 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, static int vfio_update_pending(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; uint64_t pending_bytes = 0; int ret; ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); if (ret < 0) { migration->pending_bytes = 0; return ret; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index bed8eaa4c2..36f8524e7c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3633,6 +3633,13 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) } } + if (!pdev->failover_pair_id) { + ret = vfio_migration_probe(&vdev->vbasedev, errp); + if (ret) { + error_report("%s: Migration disabled", vdev->vbasedev.name); + } + } + vfio_register_err_notifier(vdev); vfio_register_req_notifier(vdev); diff --git a/hw/vfio/user.c b/hw/vfio/user.c index 8917596a2f..eceaeeccea 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -917,3 +917,48 @@ void vfio_user_reset(VFIODevice *vbasedev) error_printf("reset reply error %d\n", msg.error_reply); } } + +int vfio_user_dirty_bitmap(VFIOProxy *proxy, + struct vfio_iommu_type1_dirty_bitmap *cmd, + struct vfio_iommu_type1_dirty_bitmap_get *dbitmap) +{ + g_autofree struct { + struct vfio_user_dirty_pages msg; + struct vfio_user_bitmap_range range; + } *msgp = NULL; + int msize, rsize; + + /* + * If just the command is sent, the returned bitmap isn't needed. + * The bitmap structs are different from the ioctl() versions, + * ioctl() returns the bitmap in a local VA + */ + if (dbitmap != NULL) { + msize = sizeof(*msgp); + rsize = msize + dbitmap->bitmap.size; + msgp = g_malloc0(rsize); + msgp->range.iova = dbitmap->iova; + msgp->range.size = dbitmap->size; + msgp->range.bitmap.pgsize = dbitmap->bitmap.pgsize; + msgp->range.bitmap.size = dbitmap->bitmap.size; + } else { + msize = rsize = sizeof(struct vfio_user_dirty_pages); + msgp = g_malloc0(rsize); + } + + vfio_user_request_msg(&msgp->msg.hdr, VFIO_USER_DIRTY_PAGES, msize, 0); + msgp->msg.argsz = msize - sizeof(msgp->msg.hdr); + msgp->msg.flags = cmd->flags; + + vfio_user_send_recv(proxy, &msgp->msg.hdr, NULL, rsize); + if (msgp->msg.hdr.flags & VFIO_USER_ERROR) { + return -msgp->msg.hdr.error_reply; + } + + if (dbitmap != NULL) { + memcpy(dbitmap->bitmap.data, &msgp->range.bitmap.data, + dbitmap->bitmap.size); + } + + return 0; +} -- 2.25.1