Hi Stephen, Maxime, Any feedback on the latest patch-set v12 ? Please help merge this into mainline so we can catch the upcoming SPDK release.
Thanks, Pravin Internal Use - Confidential > -----Original Message----- > From: Bathija, Pravin > Sent: Tuesday, May 5, 2026 9:00 PM > To: 'Maxime Coquelin' <[email protected]> > Cc: [email protected]; [email protected]; [email protected]; > [email protected] > Subject: RE: [PATCH v11 3/5] vhost_user: support function defines for back-end > > Hi Maxime, > > The response are inline. I have also submitted patch-set v12 with the changes. > > From: Maxime Coquelin <[email protected]> > Sent: Tuesday, May 5, 2026 2:48 AM > To: Bathija, Pravin <[email protected]> > Cc: [email protected]; [email protected]; [email protected]; > [email protected] > Subject: Re: [PATCH v11 3/5] vhost_user: support function defines for back-end > > [EXTERNAL EMAIL] > > > On Tue, May 5, 2026 at 7:53 AM <mailto:[email protected]> wrote: > From: Pravin M Bathija <mailto:[email protected]> > > Here we define support functions which are called from the various vhost-user > back-end message functions like set memory table, get memory slots, add > memory region, remove memory region. These are essentially common > functions to initialize memory, unmap a set of memory regions, perform > register copy, align memory addresses, dma map/unmap a single memory > region and remove guest pages by removing all entries belonging to a given > memory region. > > Signed-off-by: Pravin M Bathija <mailto:[email protected]> > --- > lib/vhost/vhost_user.c | 146 ++++++++++++++++++++++++++++++++++++++-- > - > 1 file changed, 136 insertions(+), 10 deletions(-) > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index > 4bfb13fb98..1f96ecf963 100644 > --- a/lib/vhost/vhost_user.c > +++ b/lib/vhost/vhost_user.c > @@ -171,6 +171,52 @@ get_blk_size(int fd) > return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; > } > > +static int > +async_dma_map_region(struct virtio_net *dev, struct > +rte_vhost_mem_region *reg, bool do_map) { > + uint32_t i; > + int ret; > + uint64_t reg_start = reg->host_user_addr; > + uint64_t reg_end = reg_start + reg->size; > + > + for (i = 0; i < dev->nr_guest_pages; i++) { > + struct guest_page *page = &dev->guest_pages[i]; > + > + /* Only process pages belonging to this region */ > + if (page->host_user_addr < reg_start || > + page->host_user_addr >= reg_end) > + continue; > + > + if (do_map) { > + ret = > +rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, > + page->host_user_addr, > + page->host_iova, > + page->size); > + if (ret) { > + if (rte_errno == ENODEV) > + return 0; > + > + VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA > +engine map failed"); > + return -1; > + } > + } else { > + ret = > +rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, > + page->host_user_addr, > + page->host_iova, > + page->size); > + if (ret) { > + if (rte_errno == EINVAL) > + return 0; > + > + VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA > +engine unmap failed"); > + return -1; > + } > + } > + } > + > + return 0; > +} > + > static void > async_dma_map(struct virtio_net *dev, bool do_map) > { > @@ -225,7 +271,17 @@ async_dma_map(struct virtio_net *dev, bool > do_map) > } > > I think async_dma_map and async_dma_map_region should be refactored to > avoid code duplication, What about something like this: > > static void > async_dma_map(struct virtio_net *dev, bool do_map) > { > uint32_t i; > struct rte_vhost_mem_region *reg; > > for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) { > reg = &dev->mem->regions[i]; > if (reg->host_user_addr == 0) > continue; > async_dma_map_region(dev, reg, do_map); > } > } > > Also, duplicating code and stripping comments is not ideal as they are > important (i.e. to understand why we can ignore ENODEV and EINVAL) > > DMA refactoring: async_dma_map() now delegates to > async_dma_map_region(), eliminating the duplicated DMA map/unmap logic. > The original comments explaining ENODEV/EINVAL handling have been > restored in async_dma_map_region(). > > > static void > -free_mem_region(struct virtio_net *dev) > +free_mem_region(struct rte_vhost_mem_region *reg) { > + if (reg != NULL && reg->mmap_addr) { > + munmap(reg->mmap_addr, reg->mmap_size); > + close(reg->fd); > + memset(reg, 0, sizeof(struct rte_vhost_mem_region)); > + } > +} > + > +static void > +free_all_mem_regions(struct virtio_net *dev) > { > uint32_t i; > struct rte_vhost_mem_region *reg; @@ -236,12 +292,10 @@ > free_mem_region(struct virtio_net *dev) > if (dev->async_copy && rte_vfio_is_enabled("vfio")) > async_dma_map(dev, false); > > - for (i = 0; i < dev->mem->nregions; i++) { > + for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) { > reg = &dev->mem->regions[i]; > - if (reg->host_user_addr) { > - munmap(reg->mmap_addr, reg->mmap_size); > - close(reg->fd); > - } > + if (reg->mmap_addr) > + free_mem_region(reg); > } > } > > @@ -255,7 +309,7 @@ vhost_backend_cleanup(struct virtio_net *dev) > vdpa_dev->ops->dev_cleanup(dev->vid); > > if (dev->mem) { > - free_mem_region(dev); > + free_all_mem_regions(dev); > rte_free(dev->mem); > dev->mem = NULL; > } > @@ -704,7 +758,7 @@ numa_realloc(struct virtio_net **pdev, struct > vhost_virtqueue **pvq) > vhost_devices[dev->vid] = dev; > > mem_size = sizeof(struct rte_vhost_memory) + > - sizeof(struct rte_vhost_mem_region) * dev->mem->nregions; > + sizeof(struct rte_vhost_mem_region) * > +VHOST_MEMORY_MAX_NREGIONS; > mem = rte_realloc_socket(dev->mem, mem_size, 0, node); > if (!mem) { > VHOST_CONFIG_LOG(dev->ifname, ERR, @@ -808,8 +862,10 @@ > hua_to_alignment(struct rte_vhost_memory *mem, void *ptr) > uint32_t i; > uintptr_t hua = (uintptr_t)ptr; > > - for (i = 0; i < mem->nregions; i++) { > + for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) { > r = &mem->regions[i]; > + if (r->host_user_addr == 0) > + continue; > if (hua >= r->host_user_addr && > hua < r->host_user_addr + r->size) { > return get_blk_size(r->fd); @@ -1136,6 +1192,24 @@ > add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, > return 0; > } > > +static void > +remove_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region > +*reg) { > + uint64_t reg_start = reg->host_user_addr; > + uint64_t reg_end = reg_start + reg->size; > + uint32_t i, j = 0; > + > + for (i = 0; i < dev->nr_guest_pages; i++) { > + if (dev->guest_pages[i].host_user_addr >= reg_start && > + dev->guest_pages[i].host_user_addr < reg_end) > + continue; > + if (j != i) > + dev->guest_pages[j] = dev->guest_pages[i]; > + j++; > + } > + dev->nr_guest_pages = j; > +} > + > #ifdef RTE_LIBRTE_VHOST_DEBUG > /* TODO: enable it only in debug mode? */ > static void > @@ -1246,10 +1320,14 @@ vhost_user_postcopy_register(struct virtio_net > *dev, int main_fd, > * DPDK's virtual address with Qemu, so that Qemu can > * retrieve the region offset when handling userfaults. > */ > + int reg_msg_index = 0; > memory = &ctx->msg.payload.memory; > for (i = 0; i < memory->nregions; i++) { > reg = &dev->mem->regions[i]; > - memory->regions[i].userspace_addr = reg->host_user_addr; > + if (reg->host_user_addr == 0) > + continue; > + memory->regions[reg_msg_index].userspace_addr = > +reg->host_user_addr; > + reg_msg_index++; > } > > /* Send the addresses back to qemu */ @@ -1278,6 +1356,8 @@ > vhost_user_postcopy_register(struct virtio_net *dev, int main_fd, > /* Now userfault register and we can use the memory */ > for (i = 0; i < memory->nregions; i++) { > reg = &dev->mem->regions[i]; > + if (reg->host_user_addr == 0) > + continue; > if (vhost_user_postcopy_region_register(dev, reg) < 0) > return -1; > } > @@ -1382,6 +1462,52 @@ vhost_user_mmap_region(struct virtio_net *dev, > return 0; > } > > +static int > +vhost_user_initialize_memory(struct virtio_net **pdev) { > + struct virtio_net *dev = *pdev; > + int numa_node = SOCKET_ID_ANY; > + > + if (dev->mem != NULL) { > + VHOST_CONFIG_LOG(dev->ifname, ERR, > + "memory already initialized, free it first"); > + return -1; > + } > + > + /* > + * If VQ 0 has already been allocated, try to allocate on the > +same > + * NUMA node. It can be reallocated later in numa_realloc(). > + */ > + if (dev->nr_vring > 0) > + numa_node = dev->virtqueue[0]->numa_node; > + > + dev->nr_guest_pages = 0; > + if (dev->guest_pages == NULL) { > + dev->max_guest_pages = 8; > + dev->guest_pages = rte_zmalloc_socket(NULL, > + dev->max_guest_pages * > + sizeof(struct guest_page), > + RTE_CACHE_LINE_SIZE, > + numa_node); > + if (dev->guest_pages == NULL) { > + VHOST_CONFIG_LOG(dev->ifname, ERR, > + "failed to allocate memory for > +dev->guest_pages"); > + return -1; > + } > + } > + > + dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct > +rte_vhost_memory) + > + sizeof(struct rte_vhost_mem_region) * > +VHOST_MEMORY_MAX_NREGIONS, 0, numa_node); > + if (dev->mem == NULL) { > + VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to allocate > +memory for dev->mem"); > + rte_free(dev->guest_pages); > + dev->guest_pages = NULL; > + return -1; > + } > + > + return 0; > +} > + > > I think it should be in a dedicated patch, and in the same patch > would vhost_user_set_mem_table() make use of it. > The idea is to make it straitforward you are doing a refactoring, and easily > check the code you are extracting out from > vhost_user_set_mem_table() into a new function has not been changed in- > between. > > vhost_user_initialize_memory() patch placement: Moved from patch 3 to patch > 4, grouped with the > vhost_user_set_mem_table() refactoring that uses it. This makes the > extraction clearer to review as a pure refactor > without mixing it with other changes. > > static int > vhost_user_set_mem_table(struct virtio_net **pdev, > struct vhu_msg_context *ctx, > -- > 2.43.0

