On Tue, Feb 01, 2022 at 06:25:08PM +0200, Oleksandr Andrushchenko wrote:
> From: Roger Pau Monne <[email protected]>
>
> This way the lock can be used to check whether vpci is present, and
> removal can be performed while holding the lock, in order to make
> sure there are no accesses to the contents of the vpci struct.
> Previously removal could race with vpci_read for example, since the
> lock was dropped prior to freeing pdev->vpci.
>
> Signed-off-by: Roger Pau Monné <[email protected]>
> Signed-off-by: Oleksandr Andrushchenko <[email protected]>
> ---
> Cc: Andrew Cooper <[email protected]>
> Cc: Jan Beulich <[email protected]>
> Cc: Julien Grall <[email protected]>
> Cc: Stefano Stabellini <[email protected]>
> ---
> New in v5 of this series: this is an updated version of the patch published at
> https://lore.kernel.org/xen-devel/[email protected]/
>
> Changes since v5:
> - do not split code into vpci_remove_device_handlers_locked yet
> - move INIT_LIST_HEAD outside the locked region (Jan)
> - stripped out locking optimizations for vpci_{read|write} into a
> dedicated patch
> Changes since v2:
> - fixed pdev->vpci = xzalloc(struct vpci); under spin_lock (Jan)
> Changes since v1:
> - Assert that vpci_lock is locked in vpci_remove_device_locked.
> - Remove double newline.
> - Shrink critical section in vpci_{read/write}.
> ---
> tools/tests/vpci/emul.h | 5 ++-
> tools/tests/vpci/main.c | 4 +--
> xen/arch/x86/hvm/vmsi.c | 8 ++---
> xen/drivers/passthrough/pci.c | 1 +
> xen/drivers/vpci/header.c | 21 ++++++++----
> xen/drivers/vpci/msi.c | 11 ++++--
> xen/drivers/vpci/msix.c | 8 ++---
> xen/drivers/vpci/vpci.c | 63 ++++++++++++++++++++++-------------
> xen/include/xen/pci.h | 1 +
> xen/include/xen/vpci.h | 3 +-
> 10 files changed, 78 insertions(+), 47 deletions(-)
>
> diff --git a/tools/tests/vpci/emul.h b/tools/tests/vpci/emul.h
> index 2e1d3057c9d8..d018fb5eef21 100644
> --- a/tools/tests/vpci/emul.h
> +++ b/tools/tests/vpci/emul.h
> @@ -44,6 +44,7 @@ struct domain {
> };
>
> struct pci_dev {
> + bool vpci_lock;
> struct vpci *vpci;
> };
>
> @@ -53,10 +54,8 @@ struct vcpu
> };
>
> extern const struct vcpu *current;
> -extern const struct pci_dev test_pdev;
> +extern struct pci_dev test_pdev;
>
> -typedef bool spinlock_t;
> -#define spin_lock_init(l) (*(l) = false)
> #define spin_lock(l) (*(l) = true)
> #define spin_unlock(l) (*(l) = false)
>
> diff --git a/tools/tests/vpci/main.c b/tools/tests/vpci/main.c
> index b9a0a6006bb9..26c95b08b6b1 100644
> --- a/tools/tests/vpci/main.c
> +++ b/tools/tests/vpci/main.c
> @@ -23,7 +23,8 @@ static struct vpci vpci;
>
> const static struct domain d;
>
> -const struct pci_dev test_pdev = {
> +struct pci_dev test_pdev = {
> + .vpci_lock = false,
Nit: vpci_lock will already be initialized to false by default, so
this is redundant.
> .vpci = &vpci,
> };
>
> @@ -158,7 +159,6 @@ main(int argc, char **argv)
> int rc;
>
> INIT_LIST_HEAD(&vpci.handlers);
> - spin_lock_init(&vpci.lock);
>
> VPCI_ADD_REG(vpci_read32, vpci_write32, 0, 4, r0);
> VPCI_READ_CHECK(0, 4, r0);
> diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
> index 13e2a190b439..1f7a37f78264 100644
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -910,14 +910,14 @@ int vpci_msix_arch_print(const struct vpci_msix *msix)
> {
> struct pci_dev *pdev = msix->pdev;
>
> - spin_unlock(&msix->pdev->vpci->lock);
> + spin_unlock(&msix->pdev->vpci_lock);
> process_pending_softirqs();
> /* NB: we assume that pdev cannot go away for an alive domain. */
> - if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
> + if ( !spin_trylock(&pdev->vpci_lock) )
> return -EBUSY;
> - if ( pdev->vpci->msix != msix )
> + if ( !pdev->vpci || pdev->vpci->msix != msix )
> {
> - spin_unlock(&pdev->vpci->lock);
> + spin_unlock(&pdev->vpci_lock);
> return -EAGAIN;
> }
> }
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index 1fad80362f0e..af648c6a19b5 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -328,6 +328,7 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg,
> u8 bus, u8 devfn)
> *((u8*) &pdev->bus) = bus;
> *((u8*) &pdev->devfn) = devfn;
> pdev->domain = NULL;
> + spin_lock_init(&pdev->vpci_lock);
>
> arch_pci_init_pdev(pdev);
>
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index 40ff79c33f8f..bd23c0274d48 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -142,12 +142,13 @@ bool vpci_process_pending(struct vcpu *v)
> if ( rc == -ERESTART )
> return true;
>
> - spin_lock(&v->vpci.pdev->vpci->lock);
> - /* Disable memory decoding unconditionally on failure. */
> - modify_decoding(v->vpci.pdev,
> - rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd,
> - !rc && v->vpci.rom_only);
> - spin_unlock(&v->vpci.pdev->vpci->lock);
> + spin_lock(&v->vpci.pdev->vpci_lock);
> + if ( v->vpci.pdev->vpci )
> + /* Disable memory decoding unconditionally on failure. */
> + modify_decoding(v->vpci.pdev,
> + rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY :
> v->vpci.cmd,
> + !rc && v->vpci.rom_only);
> + spin_unlock(&v->vpci.pdev->vpci_lock);
>
> rangeset_destroy(v->vpci.mem);
> v->vpci.mem = NULL;
> @@ -285,6 +286,12 @@ static int modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> continue;
> }
>
> + spin_lock(&tmp->vpci_lock);
> + if ( !tmp->vpci )
> + {
> + spin_unlock(&tmp->vpci_lock);
> + continue;
> + }
> for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ )
> {
> const struct vpci_bar *bar = &tmp->vpci->header.bars[i];
> @@ -303,12 +310,14 @@ static int modify_bars(const struct pci_dev *pdev,
> uint16_t cmd, bool rom_only)
> rc = rangeset_remove_range(mem, start, end);
> if ( rc )
> {
> + spin_unlock(&tmp->vpci_lock);
> printk(XENLOG_G_WARNING "Failed to remove [%lx, %lx]: %d\n",
> start, end, rc);
> rangeset_destroy(mem);
> return rc;
> }
> }
> + spin_unlock(&tmp->vpci_lock);
> }
>
> ASSERT(dev);
> diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
> index 5757a7aed20f..e3ce46869dad 100644
> --- a/xen/drivers/vpci/msi.c
> +++ b/xen/drivers/vpci/msi.c
> @@ -270,7 +270,7 @@ void vpci_dump_msi(void)
> rcu_read_lock(&domlist_read_lock);
> for_each_domain ( d )
> {
> - const struct pci_dev *pdev;
> + struct pci_dev *pdev;
>
> if ( !has_vpci(d) )
> continue;
> @@ -282,8 +282,13 @@ void vpci_dump_msi(void)
> const struct vpci_msi *msi;
> const struct vpci_msix *msix;
>
> - if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
> + if ( !spin_trylock(&pdev->vpci_lock) )
> continue;
> + if ( !pdev->vpci )
> + {
> + spin_unlock(&pdev->vpci_lock);
> + continue;
> + }
>
> msi = pdev->vpci->msi;
> if ( msi && msi->enabled )
> @@ -323,7 +328,7 @@ void vpci_dump_msi(void)
> }
> }
>
> - spin_unlock(&pdev->vpci->lock);
> + spin_unlock(&pdev->vpci_lock);
> process_pending_softirqs();
> }
> }
> diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
> index 846f1b8d7038..5310cc3ff520 100644
> --- a/xen/drivers/vpci/msix.c
> +++ b/xen/drivers/vpci/msix.c
> @@ -225,7 +225,7 @@ static int msix_read(struct vcpu *v, unsigned long addr,
> unsigned int len,
I think you also need to add locking to msix_find, otherwise it will
dereference pdev->vpci without holding the vpci_lock.
It might be a better approach to rename msix_find to msix_get and
return the vpci_msix struct with the vpci_lock taken, so we can assert
it's not going to disappear under our feet. Then you will also need to
add a msix_put function that releases the lock.
> return X86EMUL_OKAY;
> }
>
> - spin_lock(&msix->pdev->vpci->lock);
> + spin_lock(&msix->pdev->vpci_lock);
> entry = get_entry(msix, addr);
> offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
>
> @@ -254,7 +254,7 @@ static int msix_read(struct vcpu *v, unsigned long addr,
> unsigned int len,
> ASSERT_UNREACHABLE();
> break;
> }
> - spin_unlock(&msix->pdev->vpci->lock);
> + spin_unlock(&msix->pdev->vpci_lock);
>
> return X86EMUL_OKAY;
> }
> @@ -297,7 +297,7 @@ static int msix_write(struct vcpu *v, unsigned long addr,
> unsigned int len,
> return X86EMUL_OKAY;
> }
>
> - spin_lock(&msix->pdev->vpci->lock);
> + spin_lock(&msix->pdev->vpci_lock);
> entry = get_entry(msix, addr);
> offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
>
> @@ -370,7 +370,7 @@ static int msix_write(struct vcpu *v, unsigned long addr,
> unsigned int len,
> ASSERT_UNREACHABLE();
> break;
> }
> - spin_unlock(&msix->pdev->vpci->lock);
> + spin_unlock(&msix->pdev->vpci_lock);
>
> return X86EMUL_OKAY;
> }
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index fb0947179b79..c015a4d77540 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -35,12 +35,10 @@ extern vpci_register_init_t *const __start_vpci_array[];
> extern vpci_register_init_t *const __end_vpci_array[];
> #define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array)
>
> -void vpci_remove_device(struct pci_dev *pdev)
> +static void vpci_remove_device_locked(struct pci_dev *pdev)
Nit: since it's a static function you can drop the vpci_ prefix here.
Thanks, Roger.