From: Faith Ekstrand <[email protected]> This will be used by the UMD to synchronize CPU-cached mappings when the UMD can't do it directly (no usermode cache maintenance instruction on Arm32).
v2: - Add more to the commit message - Change the flags to better match the drm_gem_shmem_sync semantics v3: - Add Steve's R-b v4: - No changes v5: - Drop Steve's R-b (semantics changes requiring a new review) v6: - Bail out early in panfrost_ioctl_sync_bo() if op_count is zero v7: - Hand-roll our own bo_sync() helper Signed-off-by: Faith Ekstrand <[email protected]> Signed-off-by: Boris Brezillon <[email protected]> --- drivers/gpu/drm/panfrost/panfrost_drv.c | 51 +++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.c | 84 +++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 2 + include/uapi/drm/panfrost_drm.h | 45 +++++++++++++ 4 files changed, 182 insertions(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index d650cd138dad..77b0ae5ef000 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -580,6 +580,56 @@ static int panfrost_ioctl_jm_ctx_destroy(struct drm_device *dev, void *data, return panfrost_jm_ctx_destroy(file, args->handle); } +static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panfrost_sync_bo *args = data; + struct drm_panfrost_bo_sync_op *ops; + struct drm_gem_object *obj; + int ret; + u32 i; + + if (args->pad) + return -EINVAL; + + if (!args->op_count) + return 0; + + ops = kvmalloc_array(args->op_count, sizeof(*ops), GFP_KERNEL); + if (!ops) { + DRM_DEBUG("Failed to allocate incoming BO sync ops array\n"); + return -ENOMEM; + } + + if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops, + args->op_count * sizeof(*ops))) { + DRM_DEBUG("Failed to copy in BO sync ops\n"); + ret = -EFAULT; + goto err_ops; + } + + for (i = 0; i < args->op_count; i++) { + obj = drm_gem_object_lookup(file, ops[i].handle); + if (!obj) { + ret = -ENOENT; + goto err_ops; + } + + ret = panfrost_gem_sync(obj, ops[i].type, + ops[i].offset, ops[i].size); + + drm_gem_object_put(obj); + + if (ret) + goto err_ops; + } + +err_ops: + kvfree(ops); + + return ret; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -649,6 +699,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(SET_LABEL_BO, set_label_bo, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW), + PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW), }; static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev, diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 4afd1a7f77d5..8231ae04f54c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -489,6 +489,90 @@ panfrost_gem_set_label(struct drm_gem_object *obj, const char *label) kfree_const(old_label); } +int +panfrost_gem_sync(struct drm_gem_object *obj, u32 type, u32 offset, u32 size) +{ + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + struct drm_gem_shmem_object *shmem = &bo->base; + const struct drm_device *dev = shmem->base.dev; + struct sg_table *sgt; + struct scatterlist *sgl; + unsigned int count; + + /* Make sure the range is in bounds. */ + if (offset + size < offset || offset + size > shmem->base.size) + return -EINVAL; + + /* Disallow CPU-cache maintenance on imported buffers. */ + if (drm_gem_is_imported(&shmem->base)) + return -EINVAL; + + switch (type) { + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH: + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: + break; + + default: + return -EINVAL; + } + + /* Don't bother if it's WC-mapped */ + if (shmem->map_wc) + return 0; + + /* Nothing to do if the size is zero. */ + if (size == 0) + return 0; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + for_each_sgtable_dma_sg(sgt, sgl, count) { + if (size == 0) + break; + + dma_addr_t paddr = sg_dma_address(sgl); + size_t len = sg_dma_len(sgl); + + if (len <= offset) { + offset -= len; + continue; + } + + paddr += offset; + len -= offset; + len = min_t(size_t, len, size); + size -= len; + offset = 0; + + /* It's unclear whether dma_sync_xxx() is the right API to do CPU + * cache maintenance given an IOMMU can register their own + * implementation doing more than just CPU cache flushes/invalidation, + * and what we really care about here is CPU caches only, but that's + * the best we have that is both arch-agnostic and does at least the + * CPU cache maintenance on a <page,offset,size> tuple. + * + * Also, I wish we could do a single + * + * dma_sync_single_for_device(BIDIR) + * + * and get a flush+invalidate, but that's not how it's implemented + * in practice (at least on arm64), so we have to make it + * + * dma_sync_single_for_device(TO_DEVICE) + * dma_sync_single_for_cpu(FROM_DEVICE) + * + * for the flush+invalidate case. + */ + dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE); + if (type == PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE) + dma_sync_single_for_cpu(dev->dev, paddr, len, DMA_FROM_DEVICE); + } + + return 0; +} + void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label) { diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 7fec20339354..d61ffe1f6841 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -151,6 +151,8 @@ int panfrost_gem_shrinker_init(struct drm_device *dev); void panfrost_gem_shrinker_cleanup(struct drm_device *dev); void panfrost_gem_set_label(struct drm_gem_object *obj, const char *label); +int panfrost_gem_sync(struct drm_gem_object *obj, u32 type, + u32 offset, u32 size); void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label); #ifdef CONFIG_DEBUG_FS diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 0c59714ae42b..e194e087a0c8 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -24,6 +24,7 @@ extern "C" { #define DRM_PANFROST_SET_LABEL_BO 0x09 #define DRM_PANFROST_JM_CTX_CREATE 0x0a #define DRM_PANFROST_JM_CTX_DESTROY 0x0b +#define DRM_PANFROST_SYNC_BO 0x0c #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -35,6 +36,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_SET_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct drm_panfrost_set_label_bo) #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) +#define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -308,6 +310,49 @@ struct drm_panfrost_set_label_bo { __u64 label; }; +/* Valid flags to pass to drm_panfrost_bo_sync_op */ +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH 0 +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE 1 + +/** + * struct drm_panthor_bo_flush_map_op - BO map sync op + */ +struct drm_panfrost_bo_sync_op { + /** @handle: Handle of the buffer object to sync. */ + __u32 handle; + + /** @type: Type of sync operation. */ + __u32 type; + + /** + * @offset: Offset into the BO at which the sync range starts. + * + * This will be rounded down to the nearest cache line as needed. + */ + __u32 offset; + + /** + * @size: Size of the range to sync + * + * @size + @offset will be rounded up to the nearest cache line as + * needed. + */ + __u32 size; +}; + +/** + * struct drm_panfrost_sync_bo - ioctl argument for syncing BO maps + */ +struct drm_panfrost_sync_bo { + /** Array of struct drm_panfrost_bo_sync_op */ + __u64 ops; + + /** Number of BO sync ops */ + __u32 op_count; + + __u32 pad; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 -- 2.51.1
