this updates a diff i had from a few years ago to move the vioblk
handling in vmd into a separate thread.

basically disk io in your virtual machine should not block the vcpu from
running now.

just throwing this out so people can give it a go and kick it around.

Index: Makefile
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/Makefile,v
retrieving revision 1.28
diff -u -p -r1.28 Makefile
--- Makefile    10 Nov 2022 11:46:39 -0000      1.28
+++ Makefile    11 Nov 2022 15:51:50 -0000
@@ -5,7 +5,7 @@
 PROG=          vmd
 SRCS=          vmd.c control.c log.c priv.c proc.c config.c vmm.c
 SRCS+=         vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
-SRCS+=         ns8250.c i8253.c dhcp.c packet.c mmio.c
+SRCS+=         ns8250.c i8253.c dhcp.c packet.c mmio.c task.c
 SRCS+=         parse.y atomicio.c vioscsi.c vioraw.c vioqcow2.c fw_cfg.c
 SRCS+=         vm_agentx.c
 
Index: task.c
===================================================================
RCS file: task.c
diff -N task.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ task.c      11 Nov 2022 15:51:50 -0000
@@ -0,0 +1,158 @@
+/*     $OpenBSD: task.c,v 1.2 2018/06/19 17:12:34 reyk Exp $ */
+
+/*
+ * Copyright (c) 2017 David Gwynne <d...@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "task.h"
+
+#define ISSET(_v, _m)  ((_v) & (_m))
+#define SET(_v, _m)    ((_v) |= (_m))
+#define CLR(_v, _m)    ((_v) &= ~(_m))
+
+struct taskq {
+       pthread_t                 thread;
+       struct task_list          list;
+       pthread_mutex_t           mtx;
+       pthread_cond_t            cv;
+};
+
+#define TASK_ONQUEUE           (1 << 0)
+
+static void *taskq_run(void *);
+
+struct taskq *
+taskq_create(const char *name)
+{
+       struct taskq *tq;
+       int error;
+
+       tq = malloc(sizeof(*tq));
+       if (tq == NULL)
+               return (NULL);
+
+       TAILQ_INIT(&tq->list);
+
+       error = pthread_mutex_init(&tq->mtx, NULL);
+       if (error != 0)
+               goto free;
+
+       error = pthread_cond_init(&tq->cv, NULL);
+       if (error != 0)
+               goto mtx;
+
+       error = pthread_create(&tq->thread, NULL, taskq_run, tq);
+       if (error != 0)
+               goto cv;
+
+       pthread_set_name_np(tq->thread, name);
+
+       return (tq);
+
+cv:
+       pthread_cond_destroy(&tq->cv);
+mtx:
+       pthread_mutex_destroy(&tq->mtx); /* can this really fail? */
+free:
+       free(tq);
+
+       errno = error;
+       return (NULL);
+}
+
+static void *
+taskq_run(void *tqarg)
+{
+       struct taskq *tq = tqarg;
+       struct task *t;
+
+       void (*t_func)(void *);
+       void *t_arg;
+
+       for (;;) {
+               pthread_mutex_lock(&tq->mtx);
+               while ((t = TAILQ_FIRST(&tq->list)) == NULL)
+                       pthread_cond_wait(&tq->cv, &tq->mtx);
+
+               TAILQ_REMOVE(&tq->list, t, t_entry);
+               CLR(t->t_flags, TASK_ONQUEUE);
+
+               t_func = t->t_func;
+               t_arg = t->t_arg;
+
+               pthread_mutex_unlock(&tq->mtx);
+
+               (*t_func)(t_arg);
+       }
+
+       return (NULL);
+}
+
+void
+task_set(struct task *t, void (*fn)(void *), void *arg)
+{
+       t->t_func = fn;
+       t->t_arg = arg;
+       t->t_flags = 0;
+}
+
+int
+task_add(struct taskq *tq, struct task *t)
+{
+       int rv = 1;
+
+       if (ISSET(t->t_flags, TASK_ONQUEUE))
+               return (0);
+
+       pthread_mutex_lock(&tq->mtx);
+       if (ISSET(t->t_flags, TASK_ONQUEUE))
+               rv = 0;
+       else {
+               SET(t->t_flags, TASK_ONQUEUE);
+               TAILQ_INSERT_TAIL(&tq->list, t, t_entry);
+               pthread_cond_signal(&tq->cv);
+       }
+       pthread_mutex_unlock(&tq->mtx);
+
+       return (rv);
+}
+
+int
+task_del(struct taskq *tq, struct task *t)
+{
+       int rv = 1;
+
+       if (!ISSET(t->t_flags, TASK_ONQUEUE))
+               return (0);
+
+       pthread_mutex_lock(&tq->mtx);
+       if (!ISSET(t->t_flags, TASK_ONQUEUE))
+               rv = 0;
+       else {
+               TAILQ_REMOVE(&tq->list, t, t_entry);
+               CLR(t->t_flags, TASK_ONQUEUE);
+       }
+       pthread_mutex_unlock(&tq->mtx);
+
+       return (rv);
+}
Index: task.h
===================================================================
RCS file: task.h
diff -N task.h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ task.h      11 Nov 2022 15:51:50 -0000
@@ -0,0 +1,43 @@
+/*     $OpenBSD: task.h,v 1.1 2017/09/15 02:39:33 dlg Exp $ */
+
+/*
+ * Copyright (c) 2013 David Gwynne <d...@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _TASK_H_
+#define _TASK_H_
+
+#include <sys/queue.h>
+
+struct taskq;
+
+struct task {
+       TAILQ_ENTRY(task) t_entry;
+       void            (*t_func)(void *);
+       void            *t_arg;
+       unsigned int    t_flags;
+};
+
+TAILQ_HEAD(task_list, task);
+
+#define TASK_INITIALIZER(_f, _a)  {{ NULL, NULL }, (_f), (_a), 0 }
+
+struct taskq   *taskq_create(const char *);
+
+void            task_set(struct task *, void (*)(void *), void *);
+int             task_add(struct taskq *, struct task *);
+int             task_del(struct taskq *, struct task *);
+
+#endif /* _TASK_H_ */
Index: virtio.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
retrieving revision 1.97
diff -u -p -r1.97 virtio.c
--- virtio.c    29 Aug 2021 18:01:32 -0000      1.97
+++ virtio.c    11 Nov 2022 15:51:51 -0000
@@ -46,6 +46,7 @@
 #include "virtio.h"
 #include "vmd.h"
 #include "vmm.h"
+#include "task.h"
 
 extern char *__progname;
 struct viornd_dev viornd;
@@ -54,6 +55,8 @@ struct vionet_dev *vionet;
 struct vioscsi_dev *vioscsi;
 struct vmmci_dev vmmci;
 
+struct taskq *iotq;
+
 int nr_vionet;
 int nr_vioblk;
 
@@ -294,11 +297,17 @@ virtio_rnd_io(int dir, uint16_t reg, uin
 void
 vioblk_update_qa(struct vioblk_dev *dev)
 {
+       struct vioblk_queue *vbq;
+
        /* Invalid queue? */
        if (dev->cfg.queue_select > 0)
                return;
 
-       dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
+       vbq = &dev->vbq[dev->cfg.queue_select];
+
+       vbq->vq.qa = dev->cfg.queue_address;
+       vbq->ring = vaddr_mem(dev->cfg.queue_address * VIRTIO_PAGE_SIZE,
+           vring_size(VIOBLK_QUEUE_SIZE));
 }
 
 void
@@ -311,8 +320,8 @@ vioblk_update_qs(struct vioblk_dev *dev)
        }
 
        /* Update queue address/size based on queue select */
-       dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
-       dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
+       dev->cfg.queue_address = dev->vbq[dev->cfg.queue_select].vq.qa;
+       dev->cfg.queue_size = dev->vbq[dev->cfg.queue_select].vq.qs;
 }
 
 static void
@@ -421,77 +430,59 @@ vioblk_finish_write(struct ioinfo *info)
 /*
  * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can
  */
-int
-vioblk_notifyq(struct vioblk_dev *dev)
+void
+vioblk_notifyq(void *arg)
 {
-       uint64_t q_gpa;
-       uint32_t vr_sz;
-       uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
-       uint8_t ds;
-       int cnt, ret;
+       struct vioblk_queue *vbq = arg;
+       struct vioblk_dev *dev = vbq->dev;
+       struct virtio_vq_info *vq = &vbq->vq;
+       uint16_t cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
        off_t secbias;
        char *vr;
        struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc;
        struct vring_avail *avail;
        struct vring_used *used;
        struct virtio_blk_req_hdr cmd;
+       unsigned int prod, cons, cnt;
+       uint8_t ds;
 
-       ret = 0;
-
-       /* Invalid queue? */
-       if (dev->cfg.queue_notify > 0)
-               return (0);
-
-       vr_sz = vring_size(VIOBLK_QUEUE_SIZE);
-       q_gpa = dev->vq[dev->cfg.queue_notify].qa;
-       q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
-
-       vr = calloc(1, vr_sz);
+       vr = vbq->ring;
        if (vr == NULL) {
-               log_warn("calloc error getting vioblk ring");
-               return (0);
-       }
-
-       if (read_mem(q_gpa, vr, vr_sz)) {
-               log_warnx("error reading gpa 0x%llx", q_gpa);
-               goto out;
+               log_warnx("%s: ring is not initialized", __func__);
+               return;
        }
 
        /* Compute offsets in ring of descriptors, avail ring, and used ring */
        desc = (struct vring_desc *)(vr);
-       avail = (struct vring_avail *)(vr +
-           dev->vq[dev->cfg.queue_notify].vq_availoffset);
-       used = (struct vring_used *)(vr +
-           dev->vq[dev->cfg.queue_notify].vq_usedoffset);
+       avail = (struct vring_avail *)(vr + vq->vq_availoffset);
+       used = (struct vring_used *)(vr + vq->vq_usedoffset);
 
-       idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK;
+       cons = vq->last_avail & VIOBLK_QUEUE_MASK;
+       prod = avail->idx & VIOBLK_QUEUE_MASK;
 
-       if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) {
-               log_warnx("vioblk queue notify - nothing to do?");
-               goto out;
-       }
-
-       while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) {
+       if (cons == prod)
+               return;
 
-               cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK;
+       do {
+               cmd_desc_idx = avail->ring[cons] & VIOBLK_QUEUE_MASK;
                cmd_desc = &desc[cmd_desc_idx];
 
                if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) {
                        log_warnx("unchained vioblk cmd descriptor received "
                            "(idx %d)", cmd_desc_idx);
-                       goto out;
+                       break;
                }
 
                /* Read command from descriptor ring */
                if (cmd_desc->flags & VRING_DESC_F_WRITE) {
                        log_warnx("vioblk: unexpected writable cmd descriptor "
                            "%d", cmd_desc_idx);
-                       goto out;
+                       return;
                }
                if (read_mem(cmd_desc->addr, &cmd, sizeof(cmd))) {
                        log_warnx("vioblk: command read_mem error @ 0x%llx",
                            cmd_desc->addr);
-                       goto out;
+                       break;
                }
 
                switch (cmd.type) {
@@ -565,6 +556,7 @@ vioblk_notifyq(struct vioblk_dev *dev)
                        ds_desc = secdata_desc;
 
                        ds = VIRTIO_BLK_S_OK;
+
                        break;
                case VIRTIO_BLK_T_OUT:
                        secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
@@ -691,22 +683,20 @@ vioblk_notifyq(struct vioblk_dev *dev)
                        goto out;
                }
 
-               ret = 1;
                dev->cfg.isr_status = 1;
                used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx;
                used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len;
+               __sync_synchronize();
                used->idx++;
 
-               dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
-                   VIOBLK_QUEUE_MASK;
-               if (write_mem(q_gpa, vr, vr_sz))
-                       log_warnx("%s: error writing vio ring", __func__);
+               cons++;
+               cons &= VIOBLK_QUEUE_MASK;
+       } while (cons != prod);
 
-               idx = (idx + 1) & VIOBLK_QUEUE_MASK;
-       }
 out:
-       free(vr);
-       return (ret);
+       vq->last_avail = cons;
+       dev->cfg.isr_status = 1;
+       vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq);
 }
 
 int
@@ -739,8 +729,8 @@ virtio_blk_io(int dir, uint16_t reg, uin
                        break;
                case VIRTIO_CONFIG_QUEUE_NOTIFY:
                        dev->cfg.queue_notify = *data;
-                       if (vioblk_notifyq(dev))
-                               *intr = 1;
+                       task_add(iotq, &dev->vbq[0].t);
+                       *intr = 1;
                        break;
                case VIRTIO_CONFIG_DEVICE_STATUS:
                        dev->cfg.device_status = *data;
@@ -754,7 +744,7 @@ virtio_blk_io(int dir, uint16_t reg, uin
                                dev->cfg.queue_select = 0;
                                dev->cfg.queue_notify = 0;
                                dev->cfg.isr_status = 0;
-                               dev->vq[0].last_avail = 0;
+                               dev->vbq[0].vq.last_avail = 0;
                                vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq);
                        }
                        break;
@@ -1897,8 +1887,14 @@ virtio_init(struct vmd_vm *vm, int child
                        return;
                }
 
+               iotq = taskq_create("vioblk");
+               if (iotq == NULL)
+                       fatalx("unable to create vioblk taskq");
+
                /* One virtio block device for each disk defined in vcp */
                for (i = 0; i < vcp->vcp_ndisks; i++) {
+                       struct virtio_vq_info *vq;
+
                        if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
                            PCI_PRODUCT_QUMRANET_VIO_BLOCK,
                            PCI_CLASS_MASS_STORAGE,
@@ -1915,13 +1911,16 @@ virtio_init(struct vmd_vm *vm, int child
                                    "device", __progname);
                                return;
                        }
-                       vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE;
-                       vioblk[i].vq[0].vq_availoffset =
+
+                       vq = &vioblk[i].vbq[0].vq;
+                       vq->qs = VIOBLK_QUEUE_SIZE;
+                       vq->vq_availoffset =
                            sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE;
-                       vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
+                       vq->vq_usedoffset = VIRTQUEUE_ALIGN(
                            sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE
                            + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE));
-                       vioblk[i].vq[0].last_avail = 0;
+                       vq->last_avail = 0;
+
                        vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX;
                        vioblk[i].max_xfer = 1048576;
                        vioblk[i].pci_id = id;
@@ -1935,6 +1934,10 @@ virtio_init(struct vmd_vm *vm, int child
                                return;
                        }
                        vioblk[i].sz /= 512;
+
+                       vioblk[i].vbq[0].dev = &vioblk[i];
+                       task_set(&vioblk[i].vbq[0].t,
+                           vioblk_notifyq, &vioblk[i].vbq[0]);
                }
        }
 
Index: virtio.h
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v
retrieving revision 1.42
diff -u -p -r1.42 virtio.h
--- virtio.h    4 May 2022 23:17:25 -0000       1.42
+++ virtio.h    11 Nov 2022 15:51:51 -0000
@@ -24,6 +24,7 @@
 #include <event.h>
 
 #include "vmd.h"
+#include "task.h"
 
 #ifndef _VIRTIO_H_
 #define _VIRTIO_H_
@@ -167,10 +168,22 @@ struct viornd_dev {
        uint32_t vm_id;
 };
 
+/*
+ * vioblk
+ */
+
+struct vioblk_queue {
+       struct vioblk_dev       *dev;
+       void                    *ring;
+       struct virtio_vq_info    vq;
+       struct task              t;
+       struct event             ev;
+};
+
 struct vioblk_dev {
        struct virtio_io_cfg cfg;
 
-       struct virtio_vq_info vq[VIRTIO_MAX_QUEUES];
+       struct vioblk_queue vbq[VIRTIO_MAX_QUEUES];
        struct virtio_backing file;
 
        uint64_t sz;
@@ -181,7 +194,8 @@ struct vioblk_dev {
        uint32_t vm_id;
 };
 
-/* vioscsi will use at least 3 queues - 5.6.2 Virtqueues
+/*
+ * vioscsi will use at least 3 queues - 5.6.2 Virtqueues
  * Current implementation will use 3
  * 0 - control
  * 1 - event
@@ -301,7 +315,7 @@ int vioblk_restore(int, struct vmop_crea
     int[][VM_MAX_BASE_PER_DISK]);
 void vioblk_update_qs(struct vioblk_dev *);
 void vioblk_update_qa(struct vioblk_dev *);
-int vioblk_notifyq(struct vioblk_dev *);
+void vioblk_notifyq(void *);
 
 int virtio_net_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
 int vionet_dump(int);
Index: vm.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
retrieving revision 1.76
diff -u -p -r1.76 vm.c
--- vm.c        11 Nov 2022 10:52:44 -0000      1.76
+++ vm.c        11 Nov 2022 15:51:51 -0000
@@ -1990,6 +1990,31 @@ read_mem(paddr_t src, void *buf, size_t 
        return (0);
 }
 
+void *
+vaddr_mem(paddr_t src, size_t len)
+{
+       struct vm_mem_range *vmr;
+       size_t off;
+
+       vmr = find_gpa_range(&current_vm->vm_params.vmc_params, src, len);
+       if (vmr == NULL) {
+               errno = EINVAL;
+               log_warn("%s: failed - invalid memory range src = 0x%lx, "
+                   "len = 0x%zx", __func__, src, len);
+               return (NULL);
+       }
+
+       off = src - vmr->vmr_gpa;
+       if (len > (vmr->vmr_size - off)) {
+               errno = ENOMEM;
+               log_warn("%s: failed - invalid memory range src = 0x%lx, "
+                   "len = 0x%zx", __func__, src, len);
+               return (NULL);
+       }
+
+       return ((char *)vmr->vmr_va + off);
+}
+
 /*
  * vcpu_assert_pic_irq
  *
Index: vmd.h
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vmd.h,v
retrieving revision 1.111
diff -u -p -r1.111 vmd.h
--- vmd.h       31 Oct 2022 14:02:11 -0000      1.111
+++ vmd.h       11 Nov 2022 15:51:51 -0000
@@ -454,6 +454,7 @@ int  vmm_pipe(struct vmd_vm *, int, void
 void    mutex_lock(pthread_mutex_t *);
 void    mutex_unlock(pthread_mutex_t *);
 int     read_mem(paddr_t, void *buf, size_t);
+void   *vaddr_mem(paddr_t, size_t);
 int     start_vm(struct vmd_vm *, int);
 __dead void vm_shutdown(unsigned int);
 void    vm_pipe_init(struct vm_dev_pipe *, void (*)(int, short, void *));

Reply via email to