High level looks good but I have some questions. Rather than a new boolean I'd rather we re-used started_vu by changing it to an enum and having different values for starting and started.
On Tue, Mar 29, 2022 at 12:15:46AM +0800, Jie Wang wrote: > During Virtio1.0 dev(start_on_kick) in vhost_user_blk_start process, > if spdk abnormal after vhost_dev_enable_notifiers, then vhost_user_blk_start > will > goto vhost_dev_disable_notifiers and reenter vhost_user_blk_start, and > add ioeventfd again. > > func call Process as follows: > vhost_user_blk_start(spdk abnormal after vhost_dev_enable_notifiers) > ->vhost_dev_disable_notifiers > ->virtio_bus_cleanup_host_notifier > ->virtio_queue_host_notifier_read > ->virtio_queue_notify_vq > ->vhost_user_blk_handle_output > ->vhost_user_blk_start > ->vhost_dev_enable_notifiers > > then kvm_mem_ioeventfd_add will failed with errno17(File exists) and > abort(). > > The GDB stack is as follows: > (gdb) bt > 0 0x00007fca4264c81b in raise () from /usr/lib64/libc.so.6 > 1 0x00007fca4264db41 in abort () from /usr/lib64/libc.so.6 > 2 0x00007fca423ebe8b in kvm_mem_ioeventfd_add > 3 0x00007fca4241c816 in address_space_add_del_ioeventfds > 4 0x00007fca4241ddc6 in address_space_update_ioeventfds > 5 0x00007fca424203d5 in memory_region_commit () > 6 0x00007fca424204e5 in memory_region_transaction_commit () > 7 0x00007fca42421861 in memory_region_add_eventfd > 8 0x00007fca42917a4c in virtio_pci_ioeventfd_assign > 9 0x00007fca41054178 in virtio_bus_set_host_notifier > 10 0x00007fca41058729 in vhost_dev_enable_notifiers > 11 0x00007fca40fdec1e in vhost_user_blk_start > 12 0x00007fca40fdefa8 in vhost_user_blk_handle_output > 13 0x00007fca4104e135 in virtio_queue_notify_vq > 14 0x00007fca4104f192 in virtio_queue_host_notifier_read > 15 0x00007fca41054054 in virtio_bus_cleanup_host_notifier > 16 0x00007fca41058916 in vhost_dev_disable_notifiers > 17 0x00007fca40fdede0 in vhost_user_blk_start > 18 0x00007fca40fdefa8 in vhost_user_blk_handle_output > 19 0x00007fca41050a6d in virtio_queue_notify > 20 0x00007fca4241bbae in memory_region_write_accessor > 21 0x00007fca4241ab1d in access_with_adjusted_size > 22 0x00007fca4241e466 in memory_region_dispatch_write > 23 0x00007fca4242da36 in flatview_write_continue > 24 0x00007fca4242db75 in flatview_write > 25 0x00007fca42430beb in address_space_write > 26 0x00007fca42430c25 in address_space_rw > 27 0x00007fca423e8ecc in kvm_handle_io > 28 0x00007fca423ecb48 in kvm_cpu_exec > 29 0x00007fca424279d5 in qemu_kvm_cpu_thread_fn > 30 0x00007fca423c9480 in qemu_thread_start > 31 0x00007fca4257ff3b in ?? () from /usr/lib64/libpthread.so.0 > 32 0x00007fca4270b550 in clone () from /usr/lib64/libc.so.6 > > Signed-off-by: Jie Wang <[email protected]> > --- > hw/block/vhost-user-blk.c | 12 +++++++++++- > include/hw/virtio/vhost-user-blk.h | 2 ++ > 2 files changed, 13 insertions(+), 1 deletion(-) > > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c > index 1a42ae9187..2182769676 100644 > --- a/hw/block/vhost-user-blk.c > +++ b/hw/block/vhost-user-blk.c > @@ -124,6 +124,13 @@ static int vhost_user_blk_start(VirtIODevice *vdev, > Error **errp) > VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > int i, ret; > I would like a comment here explaining the check. Also wouldn't you want to set starting irrespective of whether start_on_kick is set? > + if (vdev->start_on_kick) { > + if (s->starting) { > + return 0; > + } > + s->starting = true; > + } > + > if (!k->set_guest_notifiers) { > error_setg(errp, "binding does not support guest notifiers"); > return -ENOSYS; > @@ -178,6 +185,8 @@ static int vhost_user_blk_start(VirtIODevice *vdev, Error > **errp) > vhost_virtqueue_mask(&s->dev, vdev, i, false); > } > > + s->starting = false; > + > return ret; > > err_guest_notifiers: > @@ -344,7 +353,7 @@ static int vhost_user_blk_connect(DeviceState *dev, Error > **errp) > } > > /* restore vhost state */ Can you explain the case where we could enter this path? If the device is starting and there is a full reconnect, why would we want to enter vhost_user_blk_start() again? Seems like allowing it to enter vhost_user_blk_start could cause the same issue? > - if (virtio_device_started(vdev, vdev->status)) { > + if (s->starting || virtio_device_started(vdev, vdev->status)) { > ret = vhost_user_blk_start(vdev, errp); > if (ret < 0) { > return ret; > @@ -500,6 +509,7 @@ static void vhost_user_blk_device_realize(DeviceState > *dev, Error **errp) > vhost_user_blk_handle_output); > } > > + s->starting = false; > s->inflight = g_new0(struct vhost_inflight, 1); > s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); > > diff --git a/include/hw/virtio/vhost-user-blk.h > b/include/hw/virtio/vhost-user-blk.h > index 7c91f15040..6e67f36962 100644 > --- a/include/hw/virtio/vhost-user-blk.h > +++ b/include/hw/virtio/vhost-user-blk.h > @@ -51,6 +51,8 @@ struct VHostUserBlk { > bool connected; > /* vhost_user_blk_start/vhost_user_blk_stop */ > bool started_vu; > + NIT: Spurious newline > + bool starting; > }; > > #endif > -- > 2.23.0 >
