From: John G Johnson <john.g.john...@oracle.com> Setup INTx interrupts and a device region info cache for remote device info.
Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> --- include/hw/vfio/vfio-common.h | 1 + hw/vfio/common.c | 33 ++++++++++++++++++- hw/vfio/pci.c | 61 ++++++++++++++++++++++++++++++++--- hw/vfio/user.c | 20 ++++++++++++ 4 files changed, 109 insertions(+), 6 deletions(-) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index d7b717594b..688660c28d 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -56,6 +56,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + int remfd; /* fd if exported from remote process */ } VFIORegion; typedef struct VFIOMigration { diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 9b68416599..953d9e7b55 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1571,6 +1571,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, return true; } +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index) +{ + struct vfio_region_info *info; + + if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) { + vfio_get_region_info(vbasedev, index, &info); + } + return vbasedev->regfds != NULL ? vbasedev->regfds[index] : -1; +} + static int vfio_setup_region_sparse_mmaps(VFIORegion *region, struct vfio_region_info *info) { @@ -1624,6 +1634,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, region->size = info->size; region->fd_offset = info->offset; region->nr = index; + region->remfd = vfio_get_region_info_remfd(vbasedev, index); if (region->size) { region->mem = g_new0(MemoryRegion, 1); @@ -1667,6 +1678,7 @@ int vfio_region_mmap(VFIORegion *region) { int i, prot = 0; char *name; + int fd; if (!region->mem) { return 0; @@ -1675,9 +1687,11 @@ int vfio_region_mmap(VFIORegion *region) prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; + fd = region->remfd != -1 ? region->remfd : region->vbasedev->fd; + for (i = 0; i < region->nr_mmaps; i++) { region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, - MAP_SHARED, region->vbasedev->fd, + MAP_SHARED, fd, region->fd_offset + region->mmaps[i].offset); if (region->mmaps[i].mmap == MAP_FAILED) { @@ -2524,6 +2538,23 @@ int vfio_get_device(VFIOGroup *group, const char *name, void vfio_put_base_device(VFIODevice *vbasedev) { + if (vbasedev->regions != NULL) { + int i; + + for (i = 0; i < vbasedev->num_regions; i++) { + if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) { + close(vbasedev->regfds[i]); + } + g_free(vbasedev->regions[i]); + } + g_free(vbasedev->regions); + vbasedev->regions = NULL; + if (vbasedev->regfds != NULL) { + g_free(vbasedev->regfds); + vbasedev->regfds = NULL; + } + } + if (!vbasedev->group) { return; } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 3362e8f3f5..52af5a1061 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -256,11 +256,16 @@ static void vfio_irqchip_change(Notifier *notify, void *data) static int vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) { - uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); + uint8_t pin; Error *err = NULL; int32_t fd; int ret; + if (vdev->vbasedev.proxy != NULL) { + pin = vdev->pdev.config[PCI_INTERRUPT_PIN]; + } else { + pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); + } if (!pin) { return 0; @@ -1258,10 +1263,15 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) int ret, entries; Error *err = NULL; - if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), - vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { - error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); - return -errno; + if (vdev->vbasedev.proxy != NULL) { + /* during setup, config space was initialized from remote */ + memcpy(&ctrl, vdev->pdev.config + pos + PCI_CAP_FLAGS, sizeof(ctrl)); + } else { + if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), + vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { + error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); + return -errno; + } } ctrl = le16_to_cpu(ctrl); @@ -3562,9 +3572,50 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) goto error; } + vfio_bars_register(vdev); + + ret = vfio_add_capabilities(vdev, errp); + if (ret) { + goto out_teardown; + } + + /* QEMU emulates all of MSI & MSIX */ + if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { + memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, + MSIX_CAP_LENGTH); + } + + if (pdev->cap_present & QEMU_PCI_CAP_MSI) { + memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff, + vdev->msi_cap_size); + } + + if (vdev->pdev.config[PCI_INTERRUPT_PIN] != 0) { + vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + vfio_intx_mmap_enable, vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, + vfio_intx_routing_notifier); + vdev->irqchip_change_notifier.notify = vfio_irqchip_change; + kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); + ret = vfio_intx_enable(vdev, errp); + if (ret) { + goto out_deregister; + } + } + + vfio_register_err_notifier(vdev); + vfio_register_req_notifier(vdev); + return; +out_deregister: + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); +out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); error: + vfio_user_disconnect(proxy); error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); } diff --git a/hw/vfio/user.c b/hw/vfio/user.c index 6afbde8ba8..0fd7e01986 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -574,6 +574,16 @@ VFIOProxy *vfio_user_connect_dev(char *sockname, Error **errp) return proxy; } +static void vfio_user_cb(void *opaque) +{ + VFIOProxy *proxy = opaque; + + qemu_mutex_lock(&proxy->lock); + proxy->state = CLOSED; + qemu_mutex_unlock(&proxy->lock); + qemu_cond_signal(&proxy->close_cv); +} + void vfio_user_disconnect(VFIOProxy *proxy) { VFIOUserReply *r1, *r2; @@ -601,6 +611,16 @@ void vfio_user_disconnect(VFIOProxy *proxy) g_free(r1); } + /* + * Make sure the iothread isn't blocking anywhere + * with a ref to this proxy by waiting for a BH + * handler to run after the proxy fd handlers were + * deleted above. + */ + proxy->close_wait = 1; + aio_bh_schedule_oneshot(iothread_get_aio_context(vfio_user_iothread), + vfio_user_cb, proxy); + /* drop locks so the iothread can make progress */ qemu_mutex_unlock_iothread(); qemu_cond_wait(&proxy->close_cv, &proxy->lock); -- 2.25.1