On 03/12/2025 09:01, Boris Brezillon wrote: > From: Faith Ekstrand <[email protected]> > > This will be used by the UMD to synchronize CPU-cached mappings when > the UMD can't do it directly (no usermode cache maintenance instruction > on Arm32). > > v2: > - Add more to the commit message > - Change the flags to better match the drm_gem_shmem_sync semantics > > v3: > - Add Steve's R-b > > v4: > - No changes > > v5: > - Drop Steve's R-b (semantics changes requiring a new review) > > v6: > - Bail out early in panfrost_ioctl_sync_bo() if op_count is zero > > v7: > - Hand-roll our own bo_sync() helper > > Signed-off-by: Faith Ekstrand <[email protected]> > Signed-off-by: Boris Brezillon <[email protected]>
Reviewed-by: Steven Price <[email protected]> > --- > drivers/gpu/drm/panfrost/panfrost_drv.c | 51 +++++++++++++++ > drivers/gpu/drm/panfrost/panfrost_gem.c | 84 +++++++++++++++++++++++++ > drivers/gpu/drm/panfrost/panfrost_gem.h | 2 + > include/uapi/drm/panfrost_drm.h | 45 +++++++++++++ > 4 files changed, 182 insertions(+) > > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c > b/drivers/gpu/drm/panfrost/panfrost_drv.c > index d650cd138dad..77b0ae5ef000 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c > @@ -580,6 +580,56 @@ static int panfrost_ioctl_jm_ctx_destroy(struct > drm_device *dev, void *data, > return panfrost_jm_ctx_destroy(file, args->handle); > } > > +static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data, > + struct drm_file *file) > +{ > + struct drm_panfrost_sync_bo *args = data; > + struct drm_panfrost_bo_sync_op *ops; > + struct drm_gem_object *obj; > + int ret; > + u32 i; > + > + if (args->pad) > + return -EINVAL; > + > + if (!args->op_count) > + return 0; > + > + ops = kvmalloc_array(args->op_count, sizeof(*ops), GFP_KERNEL); > + if (!ops) { > + DRM_DEBUG("Failed to allocate incoming BO sync ops array\n"); > + return -ENOMEM; > + } > + > + if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops, > + args->op_count * sizeof(*ops))) { > + DRM_DEBUG("Failed to copy in BO sync ops\n"); > + ret = -EFAULT; > + goto err_ops; > + } > + > + for (i = 0; i < args->op_count; i++) { > + obj = drm_gem_object_lookup(file, ops[i].handle); > + if (!obj) { > + ret = -ENOENT; > + goto err_ops; > + } > + > + ret = panfrost_gem_sync(obj, ops[i].type, > + ops[i].offset, ops[i].size); > + > + drm_gem_object_put(obj); > + > + if (ret) > + goto err_ops; > + } > + > +err_ops: > + kvfree(ops); > + > + return ret; > +} > + > int panfrost_unstable_ioctl_check(void) > { > if (!unstable_ioctls) > @@ -649,6 +699,7 @@ static const struct drm_ioctl_desc > panfrost_drm_driver_ioctls[] = { > PANFROST_IOCTL(SET_LABEL_BO, set_label_bo, DRM_RENDER_ALLOW), > PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW), > PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW), > + PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW), > }; > > static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev, > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c > b/drivers/gpu/drm/panfrost/panfrost_gem.c > index 4afd1a7f77d5..8231ae04f54c 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gem.c > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c > @@ -489,6 +489,90 @@ panfrost_gem_set_label(struct drm_gem_object *obj, const > char *label) > kfree_const(old_label); > } > > +int > +panfrost_gem_sync(struct drm_gem_object *obj, u32 type, u32 offset, u32 size) > +{ > + struct panfrost_gem_object *bo = to_panfrost_bo(obj); > + struct drm_gem_shmem_object *shmem = &bo->base; > + const struct drm_device *dev = shmem->base.dev; > + struct sg_table *sgt; > + struct scatterlist *sgl; > + unsigned int count; > + > + /* Make sure the range is in bounds. */ > + if (offset + size < offset || offset + size > shmem->base.size) > + return -EINVAL; > + > + /* Disallow CPU-cache maintenance on imported buffers. */ > + if (drm_gem_is_imported(&shmem->base)) > + return -EINVAL; > + > + switch (type) { > + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH: > + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: > + break; > + > + default: > + return -EINVAL; > + } > + > + /* Don't bother if it's WC-mapped */ > + if (shmem->map_wc) > + return 0; > + > + /* Nothing to do if the size is zero. */ > + if (size == 0) > + return 0; > + > + sgt = drm_gem_shmem_get_pages_sgt(shmem); > + if (IS_ERR(sgt)) > + return PTR_ERR(sgt); > + > + for_each_sgtable_dma_sg(sgt, sgl, count) { > + if (size == 0) > + break; > + > + dma_addr_t paddr = sg_dma_address(sgl); > + size_t len = sg_dma_len(sgl); > + > + if (len <= offset) { > + offset -= len; > + continue; > + } > + > + paddr += offset; > + len -= offset; > + len = min_t(size_t, len, size); > + size -= len; > + offset = 0; > + > + /* It's unclear whether dma_sync_xxx() is the right API to do > CPU > + * cache maintenance given an IOMMU can register their own > + * implementation doing more than just CPU cache > flushes/invalidation, > + * and what we really care about here is CPU caches only, but > that's > + * the best we have that is both arch-agnostic and does at > least the > + * CPU cache maintenance on a <page,offset,size> tuple. > + * > + * Also, I wish we could do a single > + * > + * dma_sync_single_for_device(BIDIR) > + * > + * and get a flush+invalidate, but that's not how it's > implemented > + * in practice (at least on arm64), so we have to make it > + * > + * dma_sync_single_for_device(TO_DEVICE) > + * dma_sync_single_for_cpu(FROM_DEVICE) > + * > + * for the flush+invalidate case. > + */ > + dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE); > + if (type == PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE) > + dma_sync_single_for_cpu(dev->dev, paddr, len, > DMA_FROM_DEVICE); > + } > + > + return 0; > +} > + > void > panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char > *label) > { > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h > b/drivers/gpu/drm/panfrost/panfrost_gem.h > index 7fec20339354..d61ffe1f6841 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gem.h > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h > @@ -151,6 +151,8 @@ int panfrost_gem_shrinker_init(struct drm_device *dev); > void panfrost_gem_shrinker_cleanup(struct drm_device *dev); > > void panfrost_gem_set_label(struct drm_gem_object *obj, const char *label); > +int panfrost_gem_sync(struct drm_gem_object *obj, u32 type, > + u32 offset, u32 size); > void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char > *label); > > #ifdef CONFIG_DEBUG_FS > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h > index 0c59714ae42b..e194e087a0c8 100644 > --- a/include/uapi/drm/panfrost_drm.h > +++ b/include/uapi/drm/panfrost_drm.h > @@ -24,6 +24,7 @@ extern "C" { > #define DRM_PANFROST_SET_LABEL_BO 0x09 > #define DRM_PANFROST_JM_CTX_CREATE 0x0a > #define DRM_PANFROST_JM_CTX_DESTROY 0x0b > +#define DRM_PANFROST_SYNC_BO 0x0c > > #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + > DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) > #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + > DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) > @@ -35,6 +36,7 @@ extern "C" { > #define DRM_IOCTL_PANFROST_SET_LABEL_BO > DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct > drm_panfrost_set_label_bo) > #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + > DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) > #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + > DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) > +#define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + > DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) > > /* > * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module > @@ -308,6 +310,49 @@ struct drm_panfrost_set_label_bo { > __u64 label; > }; > > +/* Valid flags to pass to drm_panfrost_bo_sync_op */ > +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH 0 > +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE 1 > + > +/** > + * struct drm_panthor_bo_flush_map_op - BO map sync op > + */ > +struct drm_panfrost_bo_sync_op { > + /** @handle: Handle of the buffer object to sync. */ > + __u32 handle; > + > + /** @type: Type of sync operation. */ > + __u32 type; > + > + /** > + * @offset: Offset into the BO at which the sync range starts. > + * > + * This will be rounded down to the nearest cache line as needed. > + */ > + __u32 offset; > + > + /** > + * @size: Size of the range to sync > + * > + * @size + @offset will be rounded up to the nearest cache line as > + * needed. > + */ > + __u32 size; > +}; > + > +/** > + * struct drm_panfrost_sync_bo - ioctl argument for syncing BO maps > + */ > +struct drm_panfrost_sync_bo { > + /** Array of struct drm_panfrost_bo_sync_op */ > + __u64 ops; > + > + /** Number of BO sync ops */ > + __u32 op_count; > + > + __u32 pad; > +}; > + > /* Definitions for coredump decoding in user space */ > #define PANFROSTDUMP_MAJOR 1 > #define PANFROSTDUMP_MINOR 0
