On Fri, Aug 11, 2023 at 10:33 PM Ilya Maximets <[email protected]> wrote: > > Lots of virtio functions that are on a hot path in data transmission > are initializing indirect descriptor cache at the point of stack > allocation. It's a 112 byte structure that is getting zeroed out on > each call adding unnecessary overhead. It's going to be correctly > initialized later via special init function. The only reason to > actually initialize right away is the ability to safely destruct it. > Replacing a designated initializer with a function to only initialize > what is necessary. > > Removal of the unnecessary stack initializations improves throughput > of virtio-net devices in terms of 64B packets per second by 6-14 % > depending on the case. Tested with a proposed af-xdp network backend > and a dpdk testpmd application in the guest, but should be beneficial > for other virtio devices as well. > > Signed-off-by: Ilya Maximets <[email protected]>
Acked-by: Jason Wang <[email protected]> Thanks > --- > > Version 2: > > * Introduced an initialization function, so we don't need to compare > pointers in the end. [Stefan] > * Removed the now unused macro. [Jason] > > hw/virtio/virtio.c | 20 +++++++++++++++----- > include/exec/memory.h | 16 +++++++++++++--- > 2 files changed, 28 insertions(+), 8 deletions(-) > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > index 309038fd46..3d768fda5a 100644 > --- a/hw/virtio/virtio.c > +++ b/hw/virtio/virtio.c > @@ -1071,10 +1071,12 @@ static void virtqueue_split_get_avail_bytes(VirtQueue > *vq, > VirtIODevice *vdev = vq->vdev; > unsigned int idx; > unsigned int total_bufs, in_total, out_total; > - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; > + MemoryRegionCache indirect_desc_cache; > int64_t len = 0; > int rc; > > + address_space_cache_init_empty(&indirect_desc_cache); > + > idx = vq->last_avail_idx; > total_bufs = in_total = out_total = 0; > > @@ -1207,12 +1209,14 @@ static void > virtqueue_packed_get_avail_bytes(VirtQueue *vq, > VirtIODevice *vdev = vq->vdev; > unsigned int idx; > unsigned int total_bufs, in_total, out_total; > + MemoryRegionCache indirect_desc_cache; > MemoryRegionCache *desc_cache; > - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; > int64_t len = 0; > VRingPackedDesc desc; > bool wrap_counter; > > + address_space_cache_init_empty(&indirect_desc_cache); > + > idx = vq->last_avail_idx; > wrap_counter = vq->last_avail_wrap_counter; > total_bufs = in_total = out_total = 0; > @@ -1487,7 +1491,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t > sz) > { > unsigned int i, head, max; > VRingMemoryRegionCaches *caches; > - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; > + MemoryRegionCache indirect_desc_cache; > MemoryRegionCache *desc_cache; > int64_t len; > VirtIODevice *vdev = vq->vdev; > @@ -1498,6 +1502,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t > sz) > VRingDesc desc; > int rc; > > + address_space_cache_init_empty(&indirect_desc_cache); > + > RCU_READ_LOCK_GUARD(); > if (virtio_queue_empty_rcu(vq)) { > goto done; > @@ -1624,7 +1630,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t > sz) > { > unsigned int i, max; > VRingMemoryRegionCaches *caches; > - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; > + MemoryRegionCache indirect_desc_cache; > MemoryRegionCache *desc_cache; > int64_t len; > VirtIODevice *vdev = vq->vdev; > @@ -1636,6 +1642,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t > sz) > uint16_t id; > int rc; > > + address_space_cache_init_empty(&indirect_desc_cache); > + > RCU_READ_LOCK_GUARD(); > if (virtio_queue_packed_empty_rcu(vq)) { > goto done; > @@ -3935,13 +3943,15 @@ VirtioQueueElement > *qmp_x_query_virtio_queue_element(const char *path, > } else { > unsigned int head, i, max; > VRingMemoryRegionCaches *caches; > - MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; > + MemoryRegionCache indirect_desc_cache; > MemoryRegionCache *desc_cache; > VRingDesc desc; > VirtioRingDescList *list = NULL; > VirtioRingDescList *node; > int rc; int ndescs; > > + address_space_cache_init_empty(&indirect_desc_cache); > + > RCU_READ_LOCK_GUARD(); > > max = vq->vring.num; > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 68284428f8..b1c4329d11 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -2664,9 +2664,6 @@ struct MemoryRegionCache { > bool is_write; > }; > > -#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mrs.mr = NULL }) > - > - > /* address_space_ld*_cached: load from a cached #MemoryRegion > * address_space_st*_cached: store into a cached #MemoryRegion > * > @@ -2755,6 +2752,19 @@ int64_t address_space_cache_init(MemoryRegionCache > *cache, > hwaddr len, > bool is_write); > > +/** > + * address_space_cache_init_empty: Initialize empty #MemoryRegionCache > + * > + * @cache: The #MemoryRegionCache to operate on. > + * > + * Initializes #MemoryRegionCache structure without memory region attached. > + * Cache initialized this way can only be safely destroyed, but not used. > + */ > +static inline void address_space_cache_init_empty(MemoryRegionCache *cache) > +{ > + cache->mrs.mr = NULL; > +} > + > /** > * address_space_cache_invalidate: complete a write to a #MemoryRegionCache > * > -- > 2.40.1 >
