On 27/2/23 22:11, Yi Liu wrote:
This adds ioctl for userspace to bind device cdev fd to iommufd.

     VFIO_DEVICE_BIND_IOMMUFD: bind device to an iommufd, hence gain DMA
                              control provided by the iommufd. open_device
                              op is called after bind_iommufd op.
                              VFIO no iommu mode is indicated by passing
                              a negative iommufd value.

Signed-off-by: Yi Liu <[email protected]>
---
  drivers/vfio/device_cdev.c | 146 +++++++++++++++++++++++++++++++++++++
  drivers/vfio/vfio.h        |  17 ++++-
  drivers/vfio/vfio_main.c   |  54 ++++++++++++--
  include/linux/iommufd.h    |   6 ++
  include/uapi/linux/vfio.h  |  34 +++++++++
  5 files changed, 248 insertions(+), 9 deletions(-)

diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c
index 9e2c1ecaaf4f..37f80e368551 100644
--- a/drivers/vfio/device_cdev.c
+++ b/drivers/vfio/device_cdev.c
@@ -3,6 +3,7 @@
   * Copyright (c) 2023 Intel Corporation.
   */
  #include <linux/vfio.h>
+#include <linux/iommufd.h>
#include "vfio.h" @@ -45,6 +46,151 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep)
        return ret;
  }
+static void vfio_device_get_kvm_safe(struct vfio_device_file *df)
+{
+       spin_lock(&df->kvm_ref_lock);
+       if (!df->kvm)
+               goto unlock;
+
+       _vfio_device_get_kvm_safe(df->device, df->kvm);
+
+unlock:
+       spin_unlock(&df->kvm_ref_lock);
+}
+
+void vfio_device_cdev_close(struct vfio_device_file *df)
+{
+       struct vfio_device *device = df->device;
+
+       mutex_lock(&device->dev_set->lock);
+       /*
+        * As df->access_granted writer is under dev_set->lock as well,
+        * so this read no need to use smp_load_acquire() to pair with
+        * smp_store_release() in the caller of vfio_device_open().
+        */
+       if (!df->access_granted) {
+               mutex_unlock(&device->dev_set->lock);
+               return;
+       }
+       vfio_device_close(df);
+       vfio_device_put_kvm(device);
+       if (df->iommufd)
+               iommufd_ctx_put(df->iommufd);
+       mutex_unlock(&device->dev_set->lock);
+       vfio_device_unblock_group(device);
+}
+
+static struct iommufd_ctx *vfio_get_iommufd_from_fd(int fd)
+{
+       struct fd f;
+       struct iommufd_ctx *iommufd;
+
+       f = fdget(fd);
+       if (!f.file)
+               return ERR_PTR(-EBADF);
+
+       iommufd = iommufd_ctx_from_file(f.file);
+
+       fdput(f);
+       return iommufd;
+}
+
+long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df,
+                                   unsigned long arg)
+{
+       struct vfio_device *device = df->device;
+       struct vfio_device_bind_iommufd bind;
+       struct iommufd_ctx *iommufd = NULL;
+       unsigned long minsz;
+       int ret;
+
+       minsz = offsetofend(struct vfio_device_bind_iommufd, out_devid);
+
+       if (copy_from_user(&bind, (void __user *)arg, minsz))
+               return -EFAULT;
+
+       if (bind.argsz < minsz || bind.flags)
+               return -EINVAL;
+
+       if (!device->ops->bind_iommufd)
+               return -ENODEV;
+
+       ret = vfio_device_block_group(device);
+       if (ret)
+               return ret;
+
+       mutex_lock(&device->dev_set->lock);
+       /*
+        * If already been bound to an iommufd, or already set noiommu
+        * then fail it.
+        */
+       if (df->iommufd || df->noiommu) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       /* iommufd < 0 means noiommu mode */
+       if (bind.iommufd < 0) {
+               if (!capable(CAP_SYS_RAWIO)) {
+                       ret = -EPERM;
+                       goto out_unlock;
+               }
+               df->noiommu = true;
+       } else {
+               iommufd = vfio_get_iommufd_from_fd(bind.iommufd);
+               if (IS_ERR(iommufd)) {
+                       ret = PTR_ERR(iommufd);
+                       goto out_unlock;
+               }
+       }
+
+       /*
+        * Before the device open, get the KVM pointer currently
+        * associated with the device file (if there is) and obtain
+        * a reference.  This reference is held until device closed.
+        * Save the pointer in the device for use by drivers.
+        */
+       vfio_device_get_kvm_safe(df);
+
+       df->iommufd = iommufd;
+       ret = vfio_device_open(df, &bind.out_devid, NULL);


This is unrelated to this patch but reminded me - while debugging QEMU, vfio_assert_device_open() kept firing as I was killing QEMU (which in turn made the kernel close all fds), device->open_count==0 as QEMU was dying before calling ioctl(VFIO_DEVICE_BIND_IOMMUFD) which would call this vfio_device_open() just above. Has this been reported/fixed, just curious?



--
Alexey

Reply via email to