On Fri, Sep 20, 2024 at 12:07:34PM +0200, Christian König wrote:
> Am 20.09.24 um 00:35 schrieb Michał Winiarski:
> > VF MMIO resource reservation, either created by system firmware and
> > inherited by Linux PCI subsystem or created by the subsystem itself,
> > contains enough space to fit the BAR of all SR-IOV Virtual Functions
> > that can potentially be created (total VFs supported by the device).
> > This can be leveraged when the device is exposing lower than optimal BAR
> > size as a default, allowing access to the entire resource when lower
> > number of VFs are created.
> > It is achieved by dynamically resizing the BAR to largest possible value
> > that allows to fit all newly created VFs within the original resource
> > boundary.
> > 
> > Signed-off-by: Michał Winiarski <[email protected]>
> > ---
> >   drivers/pci/iov.c   | 92 ++++++++++++++++++++++++++++++++++++++++++++-
> >   drivers/pci/pci.h   |  1 +
> >   include/linux/pci.h |  3 ++
> >   3 files changed, 95 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
> > index e8ccd2ae0f024..d88efbfa70e42 100644
> > --- a/drivers/pci/iov.c
> > +++ b/drivers/pci/iov.c
> > @@ -181,6 +181,86 @@ bool pci_iov_memory_decoding_enabled(struct pci_dev 
> > *dev)
> >     return cmd & PCI_SRIOV_CTRL_MSE;
> >   }
> > +static void pci_iov_resource_do_extend(struct pci_dev *dev, int resno, u16 
> > num_vfs)
> > +{
> > +   resource_size_t size;
> > +   int ret, old, i;
> > +   u32 sizes;
> > +
> > +   pci_config_pm_runtime_get(dev);
> > +
> > +   if (pci_iov_memory_decoding_enabled(dev)) {
> > +           ret = -EBUSY;
> > +           goto err;
> > +   }
> > +
> > +   sizes = pci_rebar_get_possible_sizes(dev, resno);
> > +   if (!sizes) {
> > +           ret = -ENOTSUPP;
> > +           goto err;
> > +   }
> > +
> > +   old = pci_rebar_get_current_size(dev, resno);
> > +   if (old < 0) {
> > +           ret = old;
> > +           goto err;
> > +   }
> > +
> > +   while (sizes > 0) {
> > +           i = __fls(sizes);
> > +           size = pci_rebar_size_to_bytes(i);
> > +           if (size * num_vfs <= pci_resource_len(dev, resno)) {
> > +                   if (i != old) {
> > +                           ret = pci_rebar_set_size(dev, resno, size);
> > +                           if (ret)
> > +                                   goto err;
> > +
> > +                           pci_iov_resource_set_size(dev, resno, size);
> > +                           pci_iov_update_resource(dev, resno);
> > +                   }
> > +                   break;
> > +           }
> > +           sizes &= ~BIT(i);
> > +   }
> > +
> > +   pci_config_pm_runtime_put(dev);
> > +
> > +   return;
> > +
> > +err:
> > +   dev_WARN(&dev->dev, "Failed to extend %s: %d\n",
> > +            pci_resource_name(dev, resno), ret);
> > +
> > +   pci_config_pm_runtime_put(dev);
> > +}
> > +
> > +static void pci_iov_resource_do_restore(struct pci_dev *dev, int resno)
> > +{
> > +   if (dev->sriov->rebar_extend[resno - PCI_IOV_RESOURCES])
> > +           pci_iov_resource_do_extend(dev, resno, dev->sriov->total_VFs);
> > +}
> > +
> > +int pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable)
> > +{
> > +   if (!pci_resource_is_iov(dev, resno)) {
> > +           dev_WARN(&dev->dev, "%s is not an IOV resource\n",
> > +                    pci_resource_name(dev, resno));
> > +
> > +           return -ENODEV;
> > +   }
> > +
> > +   if (!pci_rebar_get_possible_sizes(dev, resno))
> > +           return -ENOTSUPP;
> > +
> > +   if (!enable)
> > +           pci_iov_resource_do_restore(dev, resno);
> > +
> > +   dev->sriov->rebar_extend[resno - PCI_IOV_RESOURCES] = enable;
> > +
> > +   return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(pci_iov_resource_extend);
> > +
> >   static void pci_read_vf_config_common(struct pci_dev *virtfn)
> >   {
> >     struct pci_dev *physfn = virtfn->physfn;
> > @@ -445,7 +525,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
> >                               const char *buf, size_t count)
> >   {
> >     struct pci_dev *pdev = to_pci_dev(dev);
> > -   int ret = 0;
> > +   int i, ret = 0;
> >     u16 num_vfs;
> >     if (kstrtou16(buf, 0, &num_vfs) < 0)
> > @@ -487,6 +567,11 @@ static ssize_t sriov_numvfs_store(struct device *dev,
> >             goto exit;
> >     }
> > +   for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> > +           if (pdev->sriov->rebar_extend[i])
> > +                   pci_iov_resource_do_extend(pdev, i + PCI_IOV_RESOURCES, 
> > num_vfs);
> > +   }
> > +
> 
> That sounds like a really bad idea to me.
> 
> Basically the suggestion is here that the PCI subsystem should silently
> extend and shrink the VF BARs when the number of VFs change?

Why do you think it's a bad idea? Everything is under PCI subsystem
control and the driver in charge has to explicitly opt-in to this
behavior on a per-BAR basis.

> Bjorn has the last word on that but I would say that instead the driver
> owning the PCIe device as hypervisor should resize the VF BARs to a desired
> size and that in turn restricts the number of VFs you can enable.

Then the PCI subsystem would silently change the driver_max_VFs (or new
variable, as driver_max_VFs is under PF control, so it's either new var
or checking VF BAR size in pci_sriov_set_totalvfs).
It also means that we have to do the maths to calculate the new VF limit
in both PCI subsystem and the caller.

We can go this route as well - I just think it's cleaner to keep this
all under PCI subsystem control.

I'll keep the current behavior in v3, but I'm open to changing it.

Thanks,
-Michał

> 
> Regards,
> Christian.
> 
> >     ret = pdev->driver->sriov_configure(pdev, num_vfs);
> >     if (ret < 0)
> >             goto exit;
> > @@ -881,8 +966,13 @@ static int sriov_init(struct pci_dev *dev, int pos)
> >   static void sriov_release(struct pci_dev *dev)
> >   {
> > +   int i;
> > +
> >     BUG_ON(dev->sriov->num_VFs);
> > +   for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
> > +           pci_iov_resource_do_restore(dev, i + PCI_IOV_RESOURCES);
> > +
> >     if (dev != dev->sriov->dev)
> >             pci_dev_put(dev->sriov->dev);
> > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> > index e763b3fd4c7a2..47ed2633232aa 100644
> > --- a/drivers/pci/pci.h
> > +++ b/drivers/pci/pci.h
> > @@ -385,6 +385,7 @@ struct pci_sriov {
> >     u16             subsystem_vendor; /* VF subsystem vendor */
> >     u16             subsystem_device; /* VF subsystem device */
> >     resource_size_t barsz[PCI_SRIOV_NUM_BARS];      /* VF BAR size */
> > +   bool            rebar_extend[PCI_SRIOV_NUM_BARS];       /* Resize VF 
> > BAR */
> >     bool            drivers_autoprobe; /* Auto probing of VFs by driver */
> >   };
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 4cf89a4b4cbcf..c007119da7b3d 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -2364,6 +2364,7 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 
> > numvfs);
> >   int pci_sriov_get_totalvfs(struct pci_dev *dev);
> >   int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn);
> >   resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
> > +int pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable);
> >   void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);
> >   /* Arch may override these (weak) */
> > @@ -2416,6 +2417,8 @@ static inline int pci_sriov_get_totalvfs(struct 
> > pci_dev *dev)
> >   #define pci_sriov_configure_simple        NULL
> >   static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, 
> > int resno)
> >   { return 0; }
> > +static inline void pci_iov_resource_extend(struct pci_dev *dev, int resno, 
> > bool enable)
> > +{ return -ENODEV; }
> >   static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool 
> > probe) { }
> >   #endif
> 

Reply via email to