> From: Jason Gunthorpe <[email protected]>
> Sent: Wednesday, June 14, 2023 8:17 PM
> 
> On Wed, Jun 14, 2023 at 10:35:10AM +0000, Liu, Yi L wrote:
> 
> > > - if (fill->cur == fill->max)
> > > -         return -EAGAIN; /* Something changed, try again */
> > > + if (fill->devices_end >= fill->devices)
> > > +         return -ENOSPC;
> >
> > This should be fill->devices_end <= fill->devices.
> 
> Yep
> 
> > Even it's corrected. The
> > new code does not return -EAGAIN.
> 
> Right, there is no EAGAIN. If the caller didn't provide enough space
> the previous version returned ENOSPC:
> 
> > > - if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
> > > -         ret = -ENOSPC;
> > > -         hdr.count = fill.max;
> > > -         goto reset_info_exit;
> > > - }
> 
> -EAGAIN basically means the kernel internally malfunctioned - eg it
> allocated too little space for the actual size of devices. That is no
> longer possible in this version so it should never return -EAGAIN.

I still have one doubt. Per my understanding, this is to handle newly
plugged devices during the info reporting path. I don’t think holding
dev_set lock can prevent it. but maybe -ENOSPC is enough. @Alex,
what about your opinion?

> > And if return -ENOSPC, the expected
> > size should be returned. But I didn't see it. As the hunk below[1] is 
> > removed,
> > seems no way to know how many memory it requires.
> 
> Yes, I missed that, it should keep counting
> 
> Like this then
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c 
> b/drivers/vfio/pci/vfio_pci_core.c
> index b0eadafcbcf502..05c064896a7a94 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -775,19 +775,25 @@ static int vfio_pci_count_devs(struct pci_dev *pdev, 
> void
> *data)
>  }
> 
>  struct vfio_pci_fill_info {
> -     int max;
> -     int cur;
> -     struct vfio_pci_dependent_device *devices;
> +     struct vfio_pci_dependent_device __user *devices;
> +     struct vfio_pci_dependent_device __user *devices_end;
>       struct vfio_device *vdev;
> +     u32 count;
>       u32 flags;
>  };
> 
>  static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
>  {
> +     struct vfio_pci_dependent_device info = {
> +             .segment = pci_domain_nr(pdev->bus),
> +             .bus = pdev->bus->number,
> +             .devfn = pdev->devfn,
> +     };
>       struct vfio_pci_fill_info *fill = data;
> 
> -     if (fill->cur == fill->max)
> -             return -EAGAIN; /* Something changed, try again */
> +     fill.count++;
> +     if (fill->devices >= fill->devices_end)
> +             return 0;
> 
>       if (fill->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID) {
>               struct iommufd_ctx *iommufd = 
> vfio_iommufd_device_ictx(fill->vdev);
> @@ -800,12 +806,12 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, 
> void *data)
>                */
>               vdev = vfio_find_device_in_devset(dev_set, &pdev->dev);
>               if (!vdev)
> -                     fill->devices[fill->cur].devid = 
> VFIO_PCI_DEVID_NOT_OWNED;
> +                     info.devid = VFIO_PCI_DEVID_NOT_OWNED;
>               else
> -                     fill->devices[fill->cur].devid =
> -                             vfio_iommufd_device_hot_reset_devid(vdev, 
> iommufd);
> +                     info.devid = vfio_iommufd_device_hot_reset_devid(
> +                             vdev, iommufd);
>               /* If devid is VFIO_PCI_DEVID_NOT_OWNED, clear owned flag. */
> -             if (fill->devices[fill->cur].devid == VFIO_PCI_DEVID_NOT_OWNED)
> +             if (info.devid == VFIO_PCI_DEVID_NOT_OWNED)
>                       fill->flags &= ~VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED;
>       } else {
>               struct iommu_group *iommu_group;
> @@ -814,13 +820,13 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, 
> void *data)
>               if (!iommu_group)
>                       return -EPERM; /* Cannot reset non-isolated devices */
> 
> -             fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
> +             info.group_id = iommu_group_id(iommu_group);
>               iommu_group_put(iommu_group);
>       }
> -     fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
> -     fill->devices[fill->cur].bus = pdev->bus->number;
> -     fill->devices[fill->cur].devfn = pdev->devfn;
> -     fill->cur++;
> +
> +     if (copy_to_user(fill->devices, &info, sizeof(info)))
> +             return -EFAULT;
> +     fill->devices++;
>       return 0;
>  }
> 
> @@ -1212,8 +1218,7 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
>       unsigned long minsz =
>               offsetofend(struct vfio_pci_hot_reset_info, count);
>       struct vfio_pci_hot_reset_info hdr;
> -     struct vfio_pci_fill_info fill = { 0 };
> -     struct vfio_pci_dependent_device *devices = NULL;
> +     struct vfio_pci_fill_info fill = {};
>       bool slot = false;
>       int ret = 0;
> 
> @@ -1231,29 +1236,9 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
>       else if (pci_probe_reset_bus(vdev->pdev->bus))
>               return -ENODEV;
> 
> -     /* How many devices are affected? */
> -     ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
> -                                         &fill.max, slot);
> -     if (ret)
> -             return ret;
> -
> -     WARN_ON(!fill.max); /* Should always be at least one */
> -
> -     /*
> -      * If there's enough space, fill it now, otherwise return -ENOSPC and
> -      * the number of devices affected.
> -      */
> -     if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
> -             ret = -ENOSPC;
> -             hdr.count = fill.max;
> -             goto reset_info_exit;
> -     }
> -
> -     devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
> -     if (!devices)
> -             return -ENOMEM;
> -
> -     fill.devices = devices;
> +     fill.devices = arg->devices;
> +     fill.devices_end = arg->devices +
> +                        (hdr.argsz - sizeof(hdr)) / sizeof(arg->devices[0]);
>       fill.vdev = &vdev->vdev;
> 
>       if (vfio_device_cdev_opened(&vdev->vdev))
> @@ -1264,29 +1249,17 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
>       ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_fill_devs,
>                                           &fill, slot);
>       mutex_unlock(&vdev->vdev.dev_set->lock);
> +     if (ret)
> +             return ret;
> 
> -     /*
> -      * If a device was removed between counting and filling, we may come up
> -      * short of fill.max.  If a device was added, we'll have a return of
> -      * -EAGAIN above.
> -      */
> -     if (!ret) {
> -             hdr.count = fill.cur;
> -             hdr.flags = fill.flags;
> -     }
> -
> -reset_info_exit:
> +     hdr.count = fill.count;
> +     hdr.flags = fill.flags;
>       if (copy_to_user(arg, &hdr, minsz))
> -             ret = -EFAULT;
> +             return -EFAULT;
> 
> -     if (!ret) {
> -             if (copy_to_user(&arg->devices, devices,
> -                              hdr.count * sizeof(*devices)))
> -                     ret = -EFAULT;
> -     }
> -
> -     kfree(devices);
> -     return ret;
> +     if (fill.count != fill.devices - arg->devices)

Should be "if (fill.count != (fill.devices - arg->devices) / 
sizeof(arg->devices[0]))" 😊

Regards,
Yi Liu

> +             return -ENOSPC;
> +     return 0;
>  }
> 
>  static int

Reply via email to