Hi,
On Tuesday, June 18, 2024 11:48:34 PM GMT+5:30 Sahil Siddiq wrote:
> [...]
>
> hw/virtio/vhost-shadow-virtqueue.c | 124 ++++++++++++++++++++++++++++-
> hw/virtio/vhost-shadow-virtqueue.h | 66 ++++++++++-----
> 2 files changed, 167 insertions(+), 23 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c
> b/hw/virtio/vhost-shadow-virtqueue.c
> index fc5f408f77..e3b276a9e9 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -217,6 +217,122 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue
> *svq,
> return true;
> }
>
> +/**
> + * Write descriptors to SVQ packed vring
> + *
> + * @svq: The shadow virtqueue
> + * @sg: Cache for hwaddr
> + * @out_sg: The iovec from the guest that is read-only for device
> + * @out_num: iovec length
> + * @in_sg: The iovec from the guest that is write-only for device
> + * @in_num: iovec length
> + * @head_flags: flags for first descriptor in list
> + *
> + * Return true if success, false otherwise and print error.
> + */
> +static bool vhost_svq_vring_write_descs_packed(VhostShadowVirtqueue *svq,
> hwaddr *sg,
> + const struct iovec *out_sg, size_t
> out_num,
> + const struct iovec *in_sg, size_t
> in_num,
> + uint16_t *head_flags)
> +{
> + uint16_t id, curr, head, i;
> + unsigned n;
> + struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
> + bool ok;
> +
> + head = svq->vring_packed.next_avail_idx;
> + i = head;
> + id = svq->free_head;
> + curr = id;
> +
> + size_t num = out_num + in_num;
> +
> + if (num == 0) {
> + return true;
> + }
> +
> + ok = vhost_svq_translate_addr(svq, sg, out_sg, out_num);
> + if (unlikely(!ok)) {
> + return false;
> + }
> +
> + ok = vhost_svq_translate_addr(svq, sg + out_num, in_sg, in_num);
> + if (unlikely(!ok)) {
> + return false;
> + }
> +
> + for (n = 0; n < num; n++) {
> + uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> + (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> + (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> + if (i == head) {
> + *head_flags = flags;
> + } else {
> + descs[i].flags = flags;
> + }
> +
> + descs[i].addr = cpu_to_le64(sg[n]);
> + descs[i].id = id;
> + if (n < out_num) {
> + descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> + } else {
> + descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> + }
> +
> + curr = cpu_to_le16(svq->desc_next[curr]);
"curr" is being updated here, but descs[i].id is always set to id which doesn't
change in
the loop. So all the descriptors in the chain will have the same id. I can't
find anything
in the virtio specification [1] that suggests that all descriptors in the chain
have the same
id. Also, going by the figure captioned "Three chained descriptors available"
in the blog
post on packed virtqueues [2], it looks like the descriptors in the chain have
different
buffer ids.
The virtio implementation in Linux also reuses the same id value for all the
descriptors in a
single chain. I am not sure if I am missing something here.
> + if (++i >= svq->vring_packed.vring.num) {
> + i = 0;
> + svq->vring_packed.avail_used_flags ^=
> + 1 << VRING_PACKED_DESC_F_AVAIL |
> + 1 << VRING_PACKED_DESC_F_USED;
> + }
> + }
> +
> + if (i <= head) {
> + svq->vring_packed.avail_wrap_counter ^= 1;
> + }
> +
> + svq->vring_packed.next_avail_idx = i;
> + svq->free_head = curr;
Even though the same id is used, curr will not be id+1 here.
> + return true;
> +}
> +
> +static bool vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> + const struct iovec *out_sg, size_t out_num,
> + const struct iovec *in_sg, size_t in_num,
> + unsigned *head)
> +{
> + bool ok;
> + uint16_t head_flags = 0;
> + g_autofree hwaddr *sgs = g_new(hwaddr, out_num + in_num);
I chose to use out_num+in_num as the size instead of MAX(ount_num, in_num). I
found it easier to implement "vhost_svq_vring_write_descs_packed()" like this.
Please let me know if this isn't feasible or ideal.
> + *head = svq->vring_packed.next_avail_idx;
> +
> + /* We need some descriptors here */
> + if (unlikely(!out_num && !in_num)) {
> + qemu_log_mask(LOG_GUEST_ERROR,
> + "Guest provided element with no descriptors");
> + return false;
> + }
> +
> + ok = vhost_svq_vring_write_descs_packed(svq, sgs, out_sg, out_num,
> + in_sg, in_num, &head_flags);
> + if (unlikely(!ok)) {
> + return false;
> + }
> +
> + /*
> + * A driver MUST NOT make the first descriptor in the list
> + * available before all subsequent descriptors comprising
> + * the list are made available.
> + */
> + smp_wmb();
> + svq->vring_packed.vring.desc[*head].flags = head_flags;
> +
> + return true;
> +}
> +
> static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> {
> bool needs_kick;
> @@ -258,7 +374,13 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const
> struct iovec *out_sg,
> return -ENOSPC;
> }
>
> - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num,
> &qemu_head);
> + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) {
> + ok = vhost_svq_add_packed(svq, out_sg, out_num,
> + in_sg, in_num, &qemu_head);
> + } else {
> + ok = vhost_svq_add_split(svq, out_sg, out_num,
> + in_sg, in_num, &qemu_head);
> + }
> if (unlikely(!ok)) {
> return -EINVAL;
> }
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h
> b/hw/virtio/vhost-shadow-virtqueue.h
> index 19c842a15b..ee1a87f523 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -46,10 +46,53 @@ typedef struct VhostShadowVirtqueueOps {
> VirtQueueAvailCallback avail_handler;
> } VhostShadowVirtqueueOps;
>
> +struct vring_packed {
> + /* Actual memory layout for this queue. */
> + struct {
> + unsigned int num;
> + struct vring_packed_desc *desc;
> + struct vring_packed_desc_event *driver;
> + struct vring_packed_desc_event *device;
> + } vring;
> +
> + /* Avail used flags. */
> + uint16_t avail_used_flags;
> +
> + /* Index of the next avail descriptor. */
> + uint16_t next_avail_idx;
> +
> + /* Driver ring wrap counter */
> + bool avail_wrap_counter;
> +};
> +
> /* Shadow virtqueue to relay notifications */
> typedef struct VhostShadowVirtqueue {
> + /* Virtio queue shadowing */
> + VirtQueue *vq;
> +
> + /* Virtio device */
> + VirtIODevice *vdev;
> +
> + /* SVQ vring descriptors state */
> + SVQDescState *desc_state;
> +
> + /*
> + * Backup next field for each descriptor so we can recover securely, not
> + * needing to trust the device access.
> + */
> + uint16_t *desc_next;
> +
> + /* Next free descriptor */
> + uint16_t free_head;
> +
> + /* Size of SVQ vring free descriptors */
> + uint16_t num_free;
> +
> /* Shadow vring */
> - struct vring vring;
> + union {
> + struct vring vring;
> + struct vring_packed vring_packed;
> + };
>
> /* Shadow kick notifier, sent to vhost */
> EventNotifier hdev_kick;
> @@ -69,27 +112,12 @@ typedef struct VhostShadowVirtqueue {
> /* Guest's call notifier, where the SVQ calls guest. */
> EventNotifier svq_call;
>
> - /* Virtio queue shadowing */
> - VirtQueue *vq;
> -
> - /* Virtio device */
> - VirtIODevice *vdev;
> -
> /* IOVA mapping */
> VhostIOVATree *iova_tree;
>
> - /* SVQ vring descriptors state */
> - SVQDescState *desc_state;
> -
> /* Next VirtQueue element that guest made available */
> VirtQueueElement *next_guest_avail_elem;
>
> - /*
> - * Backup next field for each descriptor so we can recover securely, not
> - * needing to trust the device access.
> - */
> - uint16_t *desc_next;
> -
> /* Caller callbacks */
> const VhostShadowVirtqueueOps *ops;
>
> @@ -99,17 +127,11 @@ typedef struct VhostShadowVirtqueue {
> /* Next head to expose to the device */
> uint16_t shadow_avail_idx;
>
> - /* Next free descriptor */
> - uint16_t free_head;
> -
> /* Last seen used idx */
> uint16_t shadow_used_idx;
>
> /* Next head to consume from the device */
> uint16_t last_used_idx;
> -
> - /* Size of SVQ vring free descriptors */
> - uint16_t num_free;
> } VhostShadowVirtqueue;
>
> bool vhost_svq_valid_features(uint64_t features, Error **errp);
> --
> 2.45.2
>
In "struct VhostShadowVirtqueue", I rearranged the order in which some members
appear.
I tried to keep the members common to split and packed virtqueues above the
union and
the rest below the union. I haven't entirely understood the role of some of the
members
(for example, VhostShadowVirtqueueOps *ops). I'll change this ordering if need
be as I
continue to understand them better.
For the next step, I think I should work on "vhost_svq_start()" which is where
members of
the struct are actually initialized. At the moment, only the split ring part of
the structure is
initialized.
I think I should also start working on enhancing "vhost_svq_kick()" to actually
send the buffers
to the device. I think it'll be easier to test these changes once that's done
(I am not sure about
this though). Would this involve implementing the notification mechanism and
event_idx?
Thanks,
Sahil
[1]
https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.html#x1-720008
[2] https://www.redhat.com/en/blog/packed-virtqueue-how-reduce-overhead-virtio