Hi Shameer, Cedric,

> Subject: [PATCH 3/3] hw/vfio/region: Create dmabuf for PCI BAR per region
> 
> From: Nicolin Chen <[email protected]>
> 
> Linux now provides a VFIO dmabuf exporter to expose PCI BAR memory for
> P2P
> use cases. Create a dmabuf for each mapped BAR region after the mmap is
> set
> up, and store the returned fd in the region's RAMBlock. This allows QEMU
> to
> pass the fd to dma_map_file(), enabling iommufd to import the dmabuf and
> map
> the BAR correctly in the host IOMMU page table.
> 
> If the kernel lacks support or dmabuf setup fails, QEMU skips the setup
> and continues with normal mmap handling.
> 
> Signed-off-by: Nicolin Chen <[email protected]>
> Signed-off-by: Shameer Kolothum <[email protected]>
> ---
>  hw/vfio/region.c     | 57
> +++++++++++++++++++++++++++++++++++++++++++-
>  hw/vfio/trace-events |  1 +
>  2 files changed, 57 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/region.c b/hw/vfio/region.c
> index b165ab0b93..6949f6779c 100644
> --- a/hw/vfio/region.c
> +++ b/hw/vfio/region.c
> @@ -29,6 +29,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/units.h"
>  #include "monitor/monitor.h"
> +#include "system/ramblock.h"
>  #include "vfio-helpers.h"
> 
>  /*
> @@ -238,13 +239,52 @@ static void vfio_subregion_unmap(VFIORegion
> *region, int index)
>      region->mmaps[index].mmap = NULL;
>  }
> 
> +static int vfio_region_create_dma_buf(VFIORegion *region)
Would it make sense to consolidate this implementation with the one from my
series: 
https://lore.kernel.org/qemu-devel/[email protected]/
so that it is a bit more generic and can also be invoked from outside of VFIO?

Or, is it ok to have two dmabuf implementations: one that is internal to VFIO
and takes a VFIORegion as input like this one and another one that takes a
VFIODevice and iovec as input and can be invoked externally?

Thanks,
Vivek

> +{
> +    g_autofree struct vfio_device_feature *feature = NULL;
> +    VFIODevice *vbasedev = region->vbasedev;
> +    struct vfio_device_feature_dma_buf *dma_buf;
> +    size_t total_size;
> +    int i, ret;
> +
> +    g_assert(region->nr_mmaps);
> +
> +    total_size = sizeof(*feature) + sizeof(*dma_buf) +
> +                 sizeof(struct vfio_region_dma_range) * region->nr_mmaps;
> +    feature = g_malloc0(total_size);
> +    *feature = (struct vfio_device_feature) {
> +        .argsz = total_size,
> +        .flags = VFIO_DEVICE_FEATURE_GET |
> VFIO_DEVICE_FEATURE_DMA_BUF,
> +    };
> +
> +    dma_buf = (void *)feature->data;
> +    *dma_buf = (struct vfio_device_feature_dma_buf) {
> +        .region_index = region->nr,
> +        .open_flags = O_RDWR,
> +        .nr_ranges = region->nr_mmaps,
> +    };
> +
> +    for (i = 0; i < region->nr_mmaps; i++) {
> +        dma_buf->dma_ranges[i].offset = region->mmaps[i].offset;
> +        dma_buf->dma_ranges[i].length = region->mmaps[i].size;
> +    }
> +
> +    ret = vbasedev->io_ops->device_feature(vbasedev, feature);
> +    for (i = 0; i < region->nr_mmaps; i++) {
> +        trace_vfio_region_dmabuf(region->vbasedev->name, ret, region->nr,
> +                                 region->mem->name, region->mmaps[i].offset,
> +                                 region->mmaps[i].size);
> +    }
> +    return ret;
> +}
> +
>  int vfio_region_mmap(VFIORegion *region)
>  {
>      int i, ret, prot = 0;
>      char *name;
>      int fd;
> 
> -    if (!region->mem) {
> +    if (!region->mem || !region->nr_mmaps) {
>          return 0;
>      }
> 
> @@ -305,6 +345,21 @@ int vfio_region_mmap(VFIORegion *region)
>                                 region->mmaps[i].size - 1);
>      }
> 
> +    ret = vfio_region_create_dma_buf(region);
> +    if (ret < 0) {
> +        if (ret == -ENOTTY) {
> +            warn_report_once("VFIO dmabuf not supported in kernel");
> +        } else {
> +            error_report("%s: failed to create dmabuf: %s",
> +                         memory_region_name(region->mem), strerror(errno));
> +        }
> +    } else {
> +        MemoryRegion *mr = &region->mmaps[0].mem;
> +        RAMBlock *ram_block = mr->ram_block;
> +
> +        ram_block->fd = ret;
> +    }
> +
>      return 0;
> 
>  no_mmap:
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 1e895448cd..592a0349d4 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -117,6 +117,7 @@ vfio_device_put(int fd) "close vdev->fd=%d"
>  vfio_region_write(const char *name, int index, uint64_t addr, uint64_t
> data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"
>  vfio_region_read(char *name, int index, uint64_t addr, unsigned size,
> uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64
>  vfio_region_setup(const char *dev, int index, const char *name, unsigned
> long flags, unsigned long offset, unsigned long size) "Device %s, region %d
> \"%s\", flags: 0x%lx, offset: 0x%lx, size: 0x%lx"
> +vfio_region_dmabuf(const char *dev, int fd, int index,  const char *name,
> unsigned long offset, unsigned long size) "Device %s, dmabuf fd %d region
> %d \"%s\", offset: 0x%lx, size: 0x%lx"
>  vfio_region_mmap_fault(const char *name, int index, unsigned long offset,
> unsigned long size, int fault) "Region %s mmaps[%d], [0x%lx - 0x%lx], fault:
> %d"
>  vfio_region_mmap(const char *name, unsigned long offset, unsigned long
> end) "Region %s [0x%lx - 0x%lx]"
>  vfio_region_exit(const char *name, int index) "Device %s, region %d"
> --
> 2.43.0
> 


Reply via email to