blk_root_drained_end is not thread-safe too. I started looking at that with
https://www.mail-archive.com/[email protected]/msg925670.html; that's
certainly a prerequisite for this patch to be a full fix, but I have not
checked if it's enough because I don't have the QEMU sources at hand right
now.

Paolo

Il mer 4 gen 2023, 20:56 Stefan Hajnoczi <[email protected]> ha scritto:

> scsi_device_purge_requests() is called from I/O code by virtio-scsi TMF
> emulation code. It must not call Global State APIs like blk_drain()
> because that results in an assertion failure.
>
> blk_drain() is a Global State API because it uses bdrv_unref(). Actually
> ref/unref is unnecessary in device emulation code because the drive=
> qdev property holds the reference.
>
> Introduce blk_drain_noref(), a variant of blk_drain() that is
> IO_OR_GS_CODE() and doesn't take a reference to the BlockBackend.
>
> This fixes the following virtio-scsi IOThread assertion failure when
> hot-plugging scsi-hd devices:
>
>   qemu-kvm: ../block/block-backend.c:1780: void blk_drain(BlockBackend *):
> Assertion `qemu_in_main_thread()' failed.
>
>   #0  0x00007f4b6a0a154c __pthread_kill_implementation (libc.so.6>
>   #1  0x00007f4b6a054d46 raise (libc.so.6 + 0x54d46)
>   #2  0x00007f4b6a0287f3 abort (libc.so.6 + 0x287f3)
>   #3  0x00007f4b6a02871b __assert_fail_base.cold (libc.so.6 + 0x2>
>   #4  0x00007f4b6a04dce6 __assert_fail (libc.so.6 + 0x4dce6)
>   #5  0x000055c811253ac0 blk_drain (qemu-kvm + 0x808ac0)
>   #6  0x000055c810ecb951 scsi_device_purge_requests (qemu-kvm + 0>
>                          ^^^^^^^^^^^^^^^^^^^^^^^^^^
>   #7  0x000055c810ed6ab8 scsi_disk_reset (qemu-kvm + 0x48bab8)
>   #8  0x000055c8111dc908 resettable_phase_hold (qemu-kvm + 0x7919>
>   #9  0x000055c8111db7f0 device_cold_reset (qemu-kvm + 0x7907f0)
>   #10 0x000055c8110bb1a6 virtio_scsi_handle_ctrl (qemu-kvm + 0x67>
>   #11 0x000055c8110d458f virtio_queue_host_notifier_read (qemu-kv>
>   #12 0x000055c811404a73 aio_dispatch_handler (qemu-kvm + 0x9b9a7>
>   #13 0x000055c811405743 aio_poll (qemu-kvm + 0x9ba743)
>   #14 0x000055c81121d532 iothread_run (qemu-kvm + 0x7d2532)
>   #15 0x000055c81140926a qemu_thread_start (qemu-kvm + 0x9be26a)
>   #16 0x00007f4b6a09f802 start_thread (libc.so.6 + 0x9f802)
>   #17 0x00007f4b6a03f450 __clone3 (libc.so.6 + 0x3f450)
>
> Reported-by: Qing Wang <[email protected]>
> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155748
> Cc: Paolo Bonzini <[email protected]>
> Cc: Kevin Wolf <[email protected]>
> Cc: Emanuele Giuseppe Esposito <[email protected]>
> Signed-off-by: Stefan Hajnoczi <[email protected]>
> ---
>  include/sysemu/block-backend-io.h |  1 +
>  block/block-backend.c             | 23 +++++++++++++++++++++++
>  hw/scsi/scsi-bus.c                |  2 +-
>  3 files changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/include/sysemu/block-backend-io.h
> b/include/sysemu/block-backend-io.h
> index 7ec6d978d4..f73db1f77c 100644
> --- a/include/sysemu/block-backend-io.h
> +++ b/include/sysemu/block-backend-io.h
> @@ -73,6 +73,7 @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
>  int blk_get_max_iov(BlockBackend *blk);
>  int blk_get_max_hw_iov(BlockBackend *blk);
>
> +void blk_drain_noref(BlockBackend *blk);
>  void blk_io_plug(BlockBackend *blk);
>  void blk_io_unplug(BlockBackend *blk);
>  AioContext *blk_get_aio_context(BlockBackend *blk);
> diff --git a/block/block-backend.c b/block/block-backend.c
> index ba7bf1d6bc..20914bae7b 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1815,6 +1815,29 @@ void blk_drain(BlockBackend *blk)
>      }
>  }
>
> +/*
> + * Same as blk_drain() but the caller must hold a reference to blk. May be
> + * called from Global State or I/O API code. Device emulation can call
> this
> + * because the qdev drive= property holds the reference.
> + */
> +void blk_drain_noref(BlockBackend *blk)
> +{
> +    BlockDriverState *bs = blk_bs(blk);
> +    IO_OR_GS_CODE();
> +
> +    if (bs) {
> +        bdrv_drained_begin(bs);
> +    }
> +
> +    /* We may have -ENOMEDIUM completions in flight */
> +    AIO_WAIT_WHILE(blk_get_aio_context(blk),
> +                   qatomic_mb_read(&blk->in_flight) > 0);
> +
> +    if (bs) {
> +        bdrv_drained_end(bs);
> +    }
> +}
> +
>  void blk_drain_all(void)
>  {
>      BlockBackend *blk = NULL;
> diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
> index ceceafb2cd..110fd85106 100644
> --- a/hw/scsi/scsi-bus.c
> +++ b/hw/scsi/scsi-bus.c
> @@ -1663,7 +1663,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev,
> SCSISense sense)
>          req = QTAILQ_FIRST(&sdev->requests);
>          scsi_req_cancel_async(req, NULL);
>      }
> -    blk_drain(sdev->conf.blk);
> +    blk_drain_noref(sdev->conf.blk);
>      aio_context_release(blk_get_aio_context(sdev->conf.blk));
>      scsi_device_set_ua(sdev, sense);
>  }
> --
> 2.39.0
>
>

Reply via email to