this updates a diff i had from a few years ago to move the vioblk
handling in vmd into a separate thread.
basically disk io in your virtual machine should not block the vcpu
from
running now.
just throwing this out so people can give it a go and kick it around.
Index: Makefile
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/Makefile,v
retrieving revision 1.28
diff -u -p -r1.28 Makefile
--- Makefile 10 Nov 2022 11:46:39 -0000 1.28
+++ Makefile 11 Nov 2022 15:51:50 -0000
@@ -5,7 +5,7 @@
PROG= vmd
SRCS= vmd.c control.c log.c priv.c proc.c config.c vmm.c
SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
-SRCS+= ns8250.c i8253.c dhcp.c packet.c mmio.c
+SRCS+= ns8250.c i8253.c dhcp.c packet.c mmio.c task.c
SRCS+= parse.y atomicio.c vioscsi.c vioraw.c vioqcow2.c fw_cfg.c
SRCS+= vm_agentx.c
Index: task.c
===================================================================
RCS file: task.c
diff -N task.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ task.c 11 Nov 2022 15:51:50 -0000
@@ -0,0 +1,158 @@
+/* $OpenBSD: task.c,v 1.2 2018/06/19 17:12:34 reyk Exp $ */
+
+/*
+ * Copyright (c) 2017 David Gwynne <d...@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for
any
+ * purpose with or without fee is hereby granted, provided that the
above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE
FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <pthread_np.h>
+
+#include "task.h"
+
+#define ISSET(_v, _m) ((_v) & (_m))
+#define SET(_v, _m) ((_v) |= (_m))
+#define CLR(_v, _m) ((_v) &= ~(_m))
+
+struct taskq {
+ pthread_t thread;
+ struct task_list list;
+ pthread_mutex_t mtx;
+ pthread_cond_t cv;
+};
+
+#define TASK_ONQUEUE (1 << 0)
+
+static void *taskq_run(void *);
+
+struct taskq *
+taskq_create(const char *name)
+{
+ struct taskq *tq;
+ int error;
+
+ tq = malloc(sizeof(*tq));
+ if (tq == NULL)
+ return (NULL);
+
+ TAILQ_INIT(&tq->list);
+
+ error = pthread_mutex_init(&tq->mtx, NULL);
+ if (error != 0)
+ goto free;
+
+ error = pthread_cond_init(&tq->cv, NULL);
+ if (error != 0)
+ goto mtx;
+
+ error = pthread_create(&tq->thread, NULL, taskq_run, tq);
+ if (error != 0)
+ goto cv;
+
+ pthread_set_name_np(tq->thread, name);
+
+ return (tq);
+
+cv:
+ pthread_cond_destroy(&tq->cv);
+mtx:
+ pthread_mutex_destroy(&tq->mtx); /* can this really fail? */
+free:
+ free(tq);
+
+ errno = error;
+ return (NULL);
+}
+
+static void *
+taskq_run(void *tqarg)
+{
+ struct taskq *tq = tqarg;
+ struct task *t;
+
+ void (*t_func)(void *);
+ void *t_arg;
+
+ for (;;) {
+ pthread_mutex_lock(&tq->mtx);
+ while ((t = TAILQ_FIRST(&tq->list)) == NULL)
+ pthread_cond_wait(&tq->cv, &tq->mtx);
+
+ TAILQ_REMOVE(&tq->list, t, t_entry);
+ CLR(t->t_flags, TASK_ONQUEUE);
+
+ t_func = t->t_func;
+ t_arg = t->t_arg;
+
+ pthread_mutex_unlock(&tq->mtx);
+
+ (*t_func)(t_arg);
+ }
+
+ return (NULL);
+}
+
+void
+task_set(struct task *t, void (*fn)(void *), void *arg)
+{
+ t->t_func = fn;
+ t->t_arg = arg;
+ t->t_flags = 0;
+}
+
+int
+task_add(struct taskq *tq, struct task *t)
+{
+ int rv = 1;
+
+ if (ISSET(t->t_flags, TASK_ONQUEUE))
+ return (0);
+
+ pthread_mutex_lock(&tq->mtx);
+ if (ISSET(t->t_flags, TASK_ONQUEUE))
+ rv = 0;
+ else {
+ SET(t->t_flags, TASK_ONQUEUE);
+ TAILQ_INSERT_TAIL(&tq->list, t, t_entry);
+ pthread_cond_signal(&tq->cv);
+ }
+ pthread_mutex_unlock(&tq->mtx);
+
+ return (rv);
+}
+
+int
+task_del(struct taskq *tq, struct task *t)
+{
+ int rv = 1;
+
+ if (!ISSET(t->t_flags, TASK_ONQUEUE))
+ return (0);
+
+ pthread_mutex_lock(&tq->mtx);
+ if (!ISSET(t->t_flags, TASK_ONQUEUE))
+ rv = 0;
+ else {
+ TAILQ_REMOVE(&tq->list, t, t_entry);
+ CLR(t->t_flags, TASK_ONQUEUE);
+ }
+ pthread_mutex_unlock(&tq->mtx);
+
+ return (rv);
+}
Index: task.h
===================================================================
RCS file: task.h
diff -N task.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ task.h 11 Nov 2022 15:51:50 -0000
@@ -0,0 +1,43 @@
+/* $OpenBSD: task.h,v 1.1 2017/09/15 02:39:33 dlg Exp $ */
+
+/*
+ * Copyright (c) 2013 David Gwynne <d...@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for
any
+ * purpose with or without fee is hereby granted, provided that the
above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE
FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _TASK_H_
+#define _TASK_H_
+
+#include <sys/queue.h>
+
+struct taskq;
+
+struct task {
+ TAILQ_ENTRY(task) t_entry;
+ void (*t_func)(void *);
+ void *t_arg;
+ unsigned int t_flags;
+};
+
+TAILQ_HEAD(task_list, task);
+
+#define TASK_INITIALIZER(_f, _a) {{ NULL, NULL }, (_f), (_a), 0 }
+
+struct taskq *taskq_create(const char *);
+
+void task_set(struct task *, void (*)(void *), void *);
+int task_add(struct taskq *, struct task *);
+int task_del(struct taskq *, struct task *);
+
+#endif /* _TASK_H_ */
Index: virtio.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
retrieving revision 1.97
diff -u -p -r1.97 virtio.c
--- virtio.c 29 Aug 2021 18:01:32 -0000 1.97
+++ virtio.c 11 Nov 2022 15:51:51 -0000
@@ -46,6 +46,7 @@
#include "virtio.h"
#include "vmd.h"
#include "vmm.h"
+#include "task.h"
extern char *__progname;
struct viornd_dev viornd;
@@ -54,6 +55,8 @@ struct vionet_dev *vionet;
struct vioscsi_dev *vioscsi;
struct vmmci_dev vmmci;
+struct taskq *iotq;
+
int nr_vionet;
int nr_vioblk;
@@ -294,11 +297,17 @@ virtio_rnd_io(int dir, uint16_t reg, uin
void
vioblk_update_qa(struct vioblk_dev *dev)
{
+ struct vioblk_queue *vbq;
+
/* Invalid queue? */
if (dev->cfg.queue_select > 0)
return;
- dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
+ vbq = &dev->vbq[dev->cfg.queue_select];
+
+ vbq->vq.qa = dev->cfg.queue_address;
+ vbq->ring = vaddr_mem(dev->cfg.queue_address * VIRTIO_PAGE_SIZE,
+ vring_size(VIOBLK_QUEUE_SIZE));
}
void
@@ -311,8 +320,8 @@ vioblk_update_qs(struct vioblk_dev *dev)
}
/* Update queue address/size based on queue select */
- dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
- dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
+ dev->cfg.queue_address = dev->vbq[dev->cfg.queue_select].vq.qa;
+ dev->cfg.queue_size = dev->vbq[dev->cfg.queue_select].vq.qs;
}
static void
@@ -421,77 +430,59 @@ vioblk_finish_write(struct ioinfo *info)
/*
* XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we
can
*/
-int
-vioblk_notifyq(struct vioblk_dev *dev)
+void
+vioblk_notifyq(void *arg)
{
- uint64_t q_gpa;
- uint32_t vr_sz;
- uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
- uint8_t ds;
- int cnt, ret;
+ struct vioblk_queue *vbq = arg;
+ struct vioblk_dev *dev = vbq->dev;
+ struct virtio_vq_info *vq = &vbq->vq;
+ uint16_t cmd_desc_idx, secdata_desc_idx, ds_desc_idx;
off_t secbias;
char *vr;
struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc;
struct vring_avail *avail;
struct vring_used *used;
struct virtio_blk_req_hdr cmd;
+ unsigned int prod, cons, cnt;
+ uint8_t ds;
- ret = 0;
-
- /* Invalid queue? */
- if (dev->cfg.queue_notify > 0)
- return (0);
-
- vr_sz = vring_size(VIOBLK_QUEUE_SIZE);
- q_gpa = dev->vq[dev->cfg.queue_notify].qa;
- q_gpa = q_gpa * VIRTIO_PAGE_SIZE;
-
- vr = calloc(1, vr_sz);
+ vr = vbq->ring;
if (vr == NULL) {
- log_warn("calloc error getting vioblk ring");
- return (0);
- }
-
- if (read_mem(q_gpa, vr, vr_sz)) {
- log_warnx("error reading gpa 0x%llx", q_gpa);
- goto out;
+ log_warnx("%s: ring is not initialized", __func__);
+ return;
}
/* Compute offsets in ring of descriptors, avail ring, and used ring
*/
desc = (struct vring_desc *)(vr);
- avail = (struct vring_avail *)(vr +
- dev->vq[dev->cfg.queue_notify].vq_availoffset);
- used = (struct vring_used *)(vr +
- dev->vq[dev->cfg.queue_notify].vq_usedoffset);
+ avail = (struct vring_avail *)(vr + vq->vq_availoffset);
+ used = (struct vring_used *)(vr + vq->vq_usedoffset);
- idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK;
+ cons = vq->last_avail & VIOBLK_QUEUE_MASK;
+ prod = avail->idx & VIOBLK_QUEUE_MASK;
- if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) {
- log_warnx("vioblk queue notify - nothing to do?");
- goto out;
- }
-
- while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) {
+ if (cons == prod)
+ return;
- cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK;
+ do {
+ cmd_desc_idx = avail->ring[cons] & VIOBLK_QUEUE_MASK;
cmd_desc = &desc[cmd_desc_idx];
if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) {
log_warnx("unchained vioblk cmd descriptor received "
"(idx %d)", cmd_desc_idx);
- goto out;
+ break;
}
/* Read command from descriptor ring */
if (cmd_desc->flags & VRING_DESC_F_WRITE) {
log_warnx("vioblk: unexpected writable cmd descriptor "
"%d", cmd_desc_idx);
- goto out;
+ return;
}
if (read_mem(cmd_desc->addr, &cmd, sizeof(cmd))) {
log_warnx("vioblk: command read_mem error @ 0x%llx",
cmd_desc->addr);
- goto out;
+ break;
}
switch (cmd.type) {
@@ -565,6 +556,7 @@ vioblk_notifyq(struct vioblk_dev *dev)
ds_desc = secdata_desc;
ds = VIRTIO_BLK_S_OK;
+
break;
case VIRTIO_BLK_T_OUT:
secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK;
@@ -691,22 +683,20 @@ vioblk_notifyq(struct vioblk_dev *dev)
goto out;
}
- ret = 1;
dev->cfg.isr_status = 1;
used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx;
used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len;
+ __sync_synchronize();
used->idx++;
- dev->vq[dev->cfg.queue_notify].last_avail = avail->idx &
- VIOBLK_QUEUE_MASK;
- if (write_mem(q_gpa, vr, vr_sz))
- log_warnx("%s: error writing vio ring", __func__);
+ cons++;
+ cons &= VIOBLK_QUEUE_MASK;
+ } while (cons != prod);
- idx = (idx + 1) & VIOBLK_QUEUE_MASK;
- }
out:
- free(vr);
- return (ret);
+ vq->last_avail = cons;
+ dev->cfg.isr_status = 1;
+ vcpu_assert_pic_irq(dev->vm_id, 0, dev->irq);
}
int
@@ -739,8 +729,8 @@ virtio_blk_io(int dir, uint16_t reg, uin
break;
case VIRTIO_CONFIG_QUEUE_NOTIFY:
dev->cfg.queue_notify = *data;
- if (vioblk_notifyq(dev))
- *intr = 1;
+ task_add(iotq, &dev->vbq[0].t);
+ *intr = 1;
break;
case VIRTIO_CONFIG_DEVICE_STATUS:
dev->cfg.device_status = *data;
@@ -754,7 +744,7 @@ virtio_blk_io(int dir, uint16_t reg, uin
dev->cfg.queue_select = 0;
dev->cfg.queue_notify = 0;
dev->cfg.isr_status = 0;
- dev->vq[0].last_avail = 0;
+ dev->vbq[0].vq.last_avail = 0;
vcpu_deassert_pic_irq(dev->vm_id, 0, dev->irq);
}
break;
@@ -1897,8 +1887,14 @@ virtio_init(struct vmd_vm *vm, int child
return;
}
+ iotq = taskq_create("vioblk");
+ if (iotq == NULL)
+ fatalx("unable to create vioblk taskq");
+
/* One virtio block device for each disk defined in vcp */
for (i = 0; i < vcp->vcp_ndisks; i++) {
+ struct virtio_vq_info *vq;
+
if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
PCI_PRODUCT_QUMRANET_VIO_BLOCK,
PCI_CLASS_MASS_STORAGE,
@@ -1915,13 +1911,16 @@ virtio_init(struct vmd_vm *vm, int child
"device", __progname);
return;
}
- vioblk[i].vq[0].qs = VIOBLK_QUEUE_SIZE;
- vioblk[i].vq[0].vq_availoffset =
+
+ vq = &vioblk[i].vbq[0].vq;
+ vq->qs = VIOBLK_QUEUE_SIZE;
+ vq->vq_availoffset =
sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE;
- vioblk[i].vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
+ vq->vq_usedoffset = VIRTQUEUE_ALIGN(
sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE
+ sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE));
- vioblk[i].vq[0].last_avail = 0;
+ vq->last_avail = 0;
+
vioblk[i].cfg.device_feature = VIRTIO_BLK_F_SIZE_MAX;
vioblk[i].max_xfer = 1048576;
vioblk[i].pci_id = id;
@@ -1935,6 +1934,10 @@ virtio_init(struct vmd_vm *vm, int child
return;
}
vioblk[i].sz /= 512;
+
+ vioblk[i].vbq[0].dev = &vioblk[i];
+ task_set(&vioblk[i].vbq[0].t,
+ vioblk_notifyq, &vioblk[i].vbq[0]);
}
}
Index: virtio.h
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v
retrieving revision 1.42
diff -u -p -r1.42 virtio.h
--- virtio.h 4 May 2022 23:17:25 -0000 1.42
+++ virtio.h 11 Nov 2022 15:51:51 -0000
@@ -24,6 +24,7 @@
#include <event.h>
#include "vmd.h"
+#include "task.h"
#ifndef _VIRTIO_H_
#define _VIRTIO_H_
@@ -167,10 +168,22 @@ struct viornd_dev {
uint32_t vm_id;
};
+/*
+ * vioblk
+ */
+
+struct vioblk_queue {
+ struct vioblk_dev *dev;
+ void *ring;
+ struct virtio_vq_info vq;
+ struct task t;
+ struct event ev;
+};
+
struct vioblk_dev {
struct virtio_io_cfg cfg;
- struct virtio_vq_info vq[VIRTIO_MAX_QUEUES];
+ struct vioblk_queue vbq[VIRTIO_MAX_QUEUES];
struct virtio_backing file;
uint64_t sz;
@@ -181,7 +194,8 @@ struct vioblk_dev {
uint32_t vm_id;
};
-/* vioscsi will use at least 3 queues - 5.6.2 Virtqueues
+/*
+ * vioscsi will use at least 3 queues - 5.6.2 Virtqueues
* Current implementation will use 3
* 0 - control
* 1 - event
@@ -301,7 +315,7 @@ int vioblk_restore(int, struct vmop_crea
int[][VM_MAX_BASE_PER_DISK]);
void vioblk_update_qs(struct vioblk_dev *);
void vioblk_update_qa(struct vioblk_dev *);
-int vioblk_notifyq(struct vioblk_dev *);
+void vioblk_notifyq(void *);
int virtio_net_io(int, uint16_t, uint32_t *, uint8_t *, void *,
uint8_t);
int vionet_dump(int);
Index: vm.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
retrieving revision 1.76
diff -u -p -r1.76 vm.c
--- vm.c 11 Nov 2022 10:52:44 -0000 1.76
+++ vm.c 11 Nov 2022 15:51:51 -0000
@@ -1990,6 +1990,31 @@ read_mem(paddr_t src, void *buf, size_t
return (0);
}
+void *
+vaddr_mem(paddr_t src, size_t len)
+{
+ struct vm_mem_range *vmr;
+ size_t off;
+
+ vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, src, len);
+ if (vmr == NULL) {
+ errno = EINVAL;
+ log_warn("%s: failed - invalid memory range src = 0x%lx, "
+ "len = 0x%zx", __func__, src, len);
+ return (NULL);
+ }
+
+ off = src - vmr->vmr_gpa;
+ if (len > (vmr->vmr_size - off)) {
+ errno = ENOMEM;
+ log_warn("%s: failed - invalid memory range src = 0x%lx, "
+ "len = 0x%zx", __func__, src, len);
+ return (NULL);
+ }
+
+ return ((char *)vmr->vmr_va + off);
+}
+
/*
* vcpu_assert_pic_irq
*
Index: vmd.h
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vmd.h,v
retrieving revision 1.111
diff -u -p -r1.111 vmd.h
--- vmd.h 31 Oct 2022 14:02:11 -0000 1.111
+++ vmd.h 11 Nov 2022 15:51:51 -0000
@@ -454,6 +454,7 @@ int vmm_pipe(struct vmd_vm *, int, void
void mutex_lock(pthread_mutex_t *);
void mutex_unlock(pthread_mutex_t *);
int read_mem(paddr_t, void *buf, size_t);
+void *vaddr_mem(paddr_t, size_t);
int start_vm(struct vmd_vm *, int);
__dead void vm_shutdown(unsigned int);
void vm_pipe_init(struct vm_dev_pipe *, void (*)(int, short, void
*));