On Fri, Aug 11, 2023 at 10:33 PM Ilya Maximets <[email protected]> wrote:
>
> Lots of virtio functions that are on a hot path in data transmission
> are initializing indirect descriptor cache at the point of stack
> allocation.  It's a 112 byte structure that is getting zeroed out on
> each call adding unnecessary overhead.  It's going to be correctly
> initialized later via special init function.  The only reason to
> actually initialize right away is the ability to safely destruct it.
> Replacing a designated initializer with a function to only initialize
> what is necessary.
>
> Removal of the unnecessary stack initializations improves throughput
> of virtio-net devices in terms of 64B packets per second by 6-14 %
> depending on the case.  Tested with a proposed af-xdp network backend
> and a dpdk testpmd application in the guest, but should be beneficial
> for other virtio devices as well.
>
> Signed-off-by: Ilya Maximets <[email protected]>

Acked-by: Jason Wang <[email protected]>

Thanks

> ---
>
> Version 2:
>
>   * Introduced an initialization function, so we don't need to compare
>     pointers in the end. [Stefan]
>   * Removed the now unused macro. [Jason]
>
>  hw/virtio/virtio.c    | 20 +++++++++++++++-----
>  include/exec/memory.h | 16 +++++++++++++---
>  2 files changed, 28 insertions(+), 8 deletions(-)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 309038fd46..3d768fda5a 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -1071,10 +1071,12 @@ static void virtqueue_split_get_avail_bytes(VirtQueue 
> *vq,
>      VirtIODevice *vdev = vq->vdev;
>      unsigned int idx;
>      unsigned int total_bufs, in_total, out_total;
> -    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> +    MemoryRegionCache indirect_desc_cache;
>      int64_t len = 0;
>      int rc;
>
> +    address_space_cache_init_empty(&indirect_desc_cache);
> +
>      idx = vq->last_avail_idx;
>      total_bufs = in_total = out_total = 0;
>
> @@ -1207,12 +1209,14 @@ static void 
> virtqueue_packed_get_avail_bytes(VirtQueue *vq,
>      VirtIODevice *vdev = vq->vdev;
>      unsigned int idx;
>      unsigned int total_bufs, in_total, out_total;
> +    MemoryRegionCache indirect_desc_cache;
>      MemoryRegionCache *desc_cache;
> -    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
>      int64_t len = 0;
>      VRingPackedDesc desc;
>      bool wrap_counter;
>
> +    address_space_cache_init_empty(&indirect_desc_cache);
> +
>      idx = vq->last_avail_idx;
>      wrap_counter = vq->last_avail_wrap_counter;
>      total_bufs = in_total = out_total = 0;
> @@ -1487,7 +1491,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t 
> sz)
>  {
>      unsigned int i, head, max;
>      VRingMemoryRegionCaches *caches;
> -    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> +    MemoryRegionCache indirect_desc_cache;
>      MemoryRegionCache *desc_cache;
>      int64_t len;
>      VirtIODevice *vdev = vq->vdev;
> @@ -1498,6 +1502,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t 
> sz)
>      VRingDesc desc;
>      int rc;
>
> +    address_space_cache_init_empty(&indirect_desc_cache);
> +
>      RCU_READ_LOCK_GUARD();
>      if (virtio_queue_empty_rcu(vq)) {
>          goto done;
> @@ -1624,7 +1630,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t 
> sz)
>  {
>      unsigned int i, max;
>      VRingMemoryRegionCaches *caches;
> -    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> +    MemoryRegionCache indirect_desc_cache;
>      MemoryRegionCache *desc_cache;
>      int64_t len;
>      VirtIODevice *vdev = vq->vdev;
> @@ -1636,6 +1642,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t 
> sz)
>      uint16_t id;
>      int rc;
>
> +    address_space_cache_init_empty(&indirect_desc_cache);
> +
>      RCU_READ_LOCK_GUARD();
>      if (virtio_queue_packed_empty_rcu(vq)) {
>          goto done;
> @@ -3935,13 +3943,15 @@ VirtioQueueElement 
> *qmp_x_query_virtio_queue_element(const char *path,
>      } else {
>          unsigned int head, i, max;
>          VRingMemoryRegionCaches *caches;
> -        MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> +        MemoryRegionCache indirect_desc_cache;
>          MemoryRegionCache *desc_cache;
>          VRingDesc desc;
>          VirtioRingDescList *list = NULL;
>          VirtioRingDescList *node;
>          int rc; int ndescs;
>
> +        address_space_cache_init_empty(&indirect_desc_cache);
> +
>          RCU_READ_LOCK_GUARD();
>
>          max = vq->vring.num;
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 68284428f8..b1c4329d11 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -2664,9 +2664,6 @@ struct MemoryRegionCache {
>      bool is_write;
>  };
>
> -#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mrs.mr = NULL })
> -
> -
>  /* address_space_ld*_cached: load from a cached #MemoryRegion
>   * address_space_st*_cached: store into a cached #MemoryRegion
>   *
> @@ -2755,6 +2752,19 @@ int64_t address_space_cache_init(MemoryRegionCache 
> *cache,
>                                   hwaddr len,
>                                   bool is_write);
>
> +/**
> + * address_space_cache_init_empty: Initialize empty #MemoryRegionCache
> + *
> + * @cache: The #MemoryRegionCache to operate on.
> + *
> + * Initializes #MemoryRegionCache structure without memory region attached.
> + * Cache initialized this way can only be safely destroyed, but not used.
> + */
> +static inline void address_space_cache_init_empty(MemoryRegionCache *cache)
> +{
> +    cache->mrs.mr = NULL;
> +}
> +
>  /**
>   * address_space_cache_invalidate: complete a write to a #MemoryRegionCache
>   *
> --
> 2.40.1
>


Reply via email to