On Thu, Nov 13, 2025 at 7:56 PM Eugenio Pérez <[email protected]> wrote: > > Add support for assigning Address Space Identifiers (ASIDs) to each VQ > group. This enables mapping each group into a distinct memory space. > > Now that the driver can change ASID in the middle of operation, the > domain that each vq address point is also protected by domain_lock.
Maybe it's better to document what is protected by RCU and how. More below. > > Acked-by: Jason Wang <[email protected]> > Signed-off-by: Eugenio Pérez <[email protected]> > --- > v9: > * Replace mutex with RCU, as the vdpa map_ops can run from atomic > context. > > v8: > * Revert the mutex to rwlock change, it needs proper profiling to > justify it. > > v7: > * Take write lock in the error path (Jason). > > v6: > * Make vdpa_dev_add use gotos for error handling (MST). > * s/(dev->api_version < 1) ?/(dev->api_version < VDUSE_API_VERSION_1) ?/ > (MST). > * Fix struct name not matching in the doc. > > v5: > * Properly return errno if copy_to_user returns >0 in VDUSE_IOTLB_GET_FD > ioctl (Jason). > * Properly set domain bounce size to divide equally between nas (Jason). > * Exclude "padding" member from the only >V1 members in > vduse_dev_request. > > v4: > * Divide each domain bounce size between the device bounce size (Jason). > * revert unneeded addr = NULL assignment (Jason) > * Change if (x && (y || z)) return to if (x) { if (y) return; if (z) > return; } (Jason) > * Change a bad multiline comment, using @ caracter instead of * (Jason). > * Consider config->nas == 0 as a fail (Jason). > > v3: > * Get the vduse domain through the vduse_as in the map functions > (Jason). > * Squash with the patch creating the vduse_as struct (Jason). > * Create VDUSE_DEV_MAX_AS instead of comparing agains a magic number > (Jason) > > v2: > * Convert the use of mutex to rwlock. > > RFC v3: > * Increase VDUSE_MAX_VQ_GROUPS to 0xffff (Jason). It was set to a lower > value to reduce memory consumption, but vqs are already limited to > that value and userspace VDUSE is able to allocate that many vqs. > * Remove TODO about merging VDUSE_IOTLB_GET_FD ioctl with > VDUSE_IOTLB_GET_INFO. > * Use of array_index_nospec in VDUSE device ioctls. > * Embed vduse_iotlb_entry into vduse_iotlb_entry_v2. > * Move the umem mutex to asid struct so there is no contention between > ASIDs. > > RFC v2: > * Make iotlb entry the last one of vduse_iotlb_entry_v2 so the first > part of the struct is the same. > --- > drivers/vdpa/vdpa_user/vduse_dev.c | 370 ++++++++++++++++++++--------- > include/uapi/linux/vduse.h | 53 ++++- > 2 files changed, 314 insertions(+), 109 deletions(-) > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c > b/drivers/vdpa/vdpa_user/vduse_dev.c > index 97be04f73fbf..ff95ed56f22d 100644 > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > @@ -11,6 +11,7 @@ > #include "linux/virtio_net.h" > #include <linux/init.h> > #include <linux/module.h> > +#include <linux/rcupdate.h> > #include <linux/cdev.h> > #include <linux/device.h> > #include <linux/eventfd.h> > @@ -41,6 +42,7 @@ > > #define VDUSE_DEV_MAX (1U << MINORBITS) > #define VDUSE_DEV_MAX_GROUPS 0xffff > +#define VDUSE_DEV_MAX_AS 0xffff > #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024) > #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024) > #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) > @@ -86,7 +88,14 @@ struct vduse_umem { > struct mm_struct *mm; > }; > > +struct vduse_as { > + struct vduse_iova_domain *domain; > + struct vduse_umem *umem; > + struct mutex mem_lock; > +}; > + > struct vduse_vq_group { > + struct vduse_as *as __rcu; > struct vduse_dev *dev; > }; > > @@ -94,7 +103,7 @@ struct vduse_dev { > struct vduse_vdpa *vdev; > struct device *dev; > struct vduse_virtqueue **vqs; > - struct vduse_iova_domain *domain; > + struct vduse_as *as; > char *name; > struct mutex lock; > spinlock_t msg_lock; > @@ -122,9 +131,8 @@ struct vduse_dev { > u32 vq_num; > u32 vq_align; > u32 ngroups; > - struct vduse_umem *umem; > + u32 nas; > struct vduse_vq_group *groups; > - struct mutex mem_lock; > unsigned int bounce_size; > struct mutex domain_lock; > }; > @@ -314,7 +322,7 @@ static int vduse_dev_set_status(struct vduse_dev *dev, u8 > status) > return vduse_dev_msg_sync(dev, &msg); > } > > -static int vduse_dev_update_iotlb(struct vduse_dev *dev, > +static int vduse_dev_update_iotlb(struct vduse_dev *dev, u32 asid, > u64 start, u64 last) > { > struct vduse_dev_msg msg = { 0 }; > @@ -323,8 +331,14 @@ static int vduse_dev_update_iotlb(struct vduse_dev *dev, > return -EINVAL; > > msg.req.type = VDUSE_UPDATE_IOTLB; > - msg.req.iova.start = start; > - msg.req.iova.last = last; > + if (dev->api_version < VDUSE_API_VERSION_1) { > + msg.req.iova.start = start; > + msg.req.iova.last = last; > + } else { > + msg.req.iova_v2.start = start; > + msg.req.iova_v2.last = last; > + msg.req.iova_v2.asid = asid; > + } > > return vduse_dev_msg_sync(dev, &msg); > } > @@ -436,14 +450,32 @@ static __poll_t vduse_dev_poll(struct file *file, > poll_table *wait) > return mask; > } > > +/* Force set the asid to a vq group without a message to the VDUSE device */ > +static void vduse_set_group_asid_nomsg(struct vduse_dev *dev, > + unsigned int group, unsigned int asid) > +{ > + /* > + * Two concurrent updates to this pointer are valid as they cannot > + * point to an invalid region. It is ok for them to race as long as > + * the readers see a consistent state through RCU. > + */ > + rcu_assign_pointer(dev->groups[group].as, &dev->as[asid]); I'd expect at least a synchronize_rcu() here to wait for the read is done? > +} > + > static void vduse_dev_reset(struct vduse_dev *dev) > { > int i; > - struct vduse_iova_domain *domain = dev->domain; > > /* The coherent mappings are handled in vduse_dev_free_coherent() */ > - if (domain && domain->bounce_map) > - vduse_domain_reset_bounce_map(domain); > + for (i = 0; i < dev->nas; i++) { > + struct vduse_iova_domain *domain = dev->as[i].domain; > + > + if (domain && domain->bounce_map) > + vduse_domain_reset_bounce_map(domain); > + } > + > + for (i = 0; i < dev->ngroups; i++) > + vduse_set_group_asid_nomsg(dev, i, 0); > > down_write(&dev->rwsem); > > @@ -623,6 +655,29 @@ static union virtio_map vduse_get_vq_map(struct > vdpa_device *vdpa, u16 idx) > return ret; > } > > +static int vduse_set_group_asid(struct vdpa_device *vdpa, unsigned int group, > + unsigned int asid) > +{ > + struct vduse_dev *dev = vdpa_to_vduse(vdpa); > + struct vduse_dev_msg msg = { 0 }; > + int r; > + > + if (dev->api_version < VDUSE_API_VERSION_1 || > + group >= dev->ngroups || asid >= dev->nas) > + return -EINVAL; > + > + msg.req.type = VDUSE_SET_VQ_GROUP_ASID; > + msg.req.vq_group_asid.group = group; > + msg.req.vq_group_asid.asid = asid; > + > + r = vduse_dev_msg_sync(dev, &msg); > + if (r < 0) > + return r; > + > + vduse_set_group_asid_nomsg(dev, group, asid); > + return 0; > +} > + > static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > struct vdpa_vq_state *state) > { > @@ -794,13 +849,13 @@ static int vduse_vdpa_set_map(struct vdpa_device *vdpa, > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > int ret; > > - ret = vduse_domain_set_map(dev->domain, iotlb); > + ret = vduse_domain_set_map(dev->as[asid].domain, iotlb); > if (ret) > return ret; > > - ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); > + ret = vduse_dev_update_iotlb(dev, asid, 0ULL, ULLONG_MAX); > if (ret) { > - vduse_domain_clear_map(dev->domain, iotlb); > + vduse_domain_clear_map(dev->as[asid].domain, iotlb); > return ret; > } > > @@ -843,6 +898,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops > = { > .get_vq_affinity = vduse_vdpa_get_vq_affinity, > .reset = vduse_vdpa_reset, > .set_map = vduse_vdpa_set_map, > + .set_group_asid = vduse_set_group_asid, > .get_vq_map = vduse_get_vq_map, > .free = vduse_vdpa_free, > }; > @@ -852,14 +908,17 @@ static void vduse_dev_sync_single_for_device(union > virtio_map token, > enum dma_data_direction dir) > { > struct vduse_dev *vdev; > + struct vduse_as *as; > struct vduse_iova_domain *domain; > > if (!token.group) > return; > > vdev = token.group->dev; > - domain = vdev->domain; > - > + rcu_read_lock(); > + as = rcu_dereference(token.group->as); > + domain = as->domain; > + rcu_read_unlock(); > vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); This is suspicious, at least we should do rcu_read_unlock() after vduse_domain_sync_single_for_device(), otherwise I don't see how RCU works. > } > > @@ -868,14 +927,17 @@ static void vduse_dev_sync_single_for_cpu(union > virtio_map token, > enum dma_data_direction dir) > { > struct vduse_dev *vdev; > + struct vduse_as *as; > struct vduse_iova_domain *domain; > > if (!token.group) > return; > > vdev = token.group->dev; > - domain = vdev->domain; > - > + rcu_read_lock(); > + as = rcu_dereference(token.group->as); > + domain = as->domain; > + rcu_read_unlock(); > vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); > } > > @@ -885,15 +947,21 @@ static dma_addr_t vduse_dev_map_page(union virtio_map > token, struct page *page, > unsigned long attrs) > { > struct vduse_dev *vdev; > + struct vduse_as *as; > struct vduse_iova_domain *domain; > + dma_addr_t r; > > if (!token.group) > return DMA_MAPPING_ERROR; > > vdev = token.group->dev; > - domain = vdev->domain; > + rcu_read_lock(); > + as = rcu_dereference(token.group->as); > + domain = as->domain; > + rcu_read_unlock(); > + r = vduse_domain_map_page(domain, page, offset, size, dir, attrs); Same here. Thanks

