From: Faith Ekstrand <[email protected]>

This will be used by the UMD to synchronize CPU-cached mappings when
the UMD can't do it directly (no usermode cache maintenance instruction
on Arm32).

v2:
- Add more to the commit message
- Change the flags to better match the drm_gem_shmem_sync semantics

v3:
- Add Steve's R-b

v4:
- No changes

v5:
- Drop Steve's R-b (semantics changes requiring a new review)

v6:
- Bail out early in panfrost_ioctl_sync_bo() if op_count is zero

v7:
- Hand-roll our own bo_sync() helper

Signed-off-by: Faith Ekstrand <[email protected]>
Signed-off-by: Boris Brezillon <[email protected]>
---
 drivers/gpu/drm/panfrost/panfrost_drv.c | 51 +++++++++++++++
 drivers/gpu/drm/panfrost/panfrost_gem.c | 84 +++++++++++++++++++++++++
 drivers/gpu/drm/panfrost/panfrost_gem.h |  2 +
 include/uapi/drm/panfrost_drm.h         | 45 +++++++++++++
 4 files changed, 182 insertions(+)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c 
b/drivers/gpu/drm/panfrost/panfrost_drv.c
index d650cd138dad..77b0ae5ef000 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -580,6 +580,56 @@ static int panfrost_ioctl_jm_ctx_destroy(struct drm_device 
*dev, void *data,
        return panfrost_jm_ctx_destroy(file, args->handle);
 }
 
+static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data,
+                                 struct drm_file *file)
+{
+       struct drm_panfrost_sync_bo *args = data;
+       struct drm_panfrost_bo_sync_op *ops;
+       struct drm_gem_object *obj;
+       int ret;
+       u32 i;
+
+       if (args->pad)
+               return -EINVAL;
+
+       if (!args->op_count)
+               return 0;
+
+       ops = kvmalloc_array(args->op_count, sizeof(*ops), GFP_KERNEL);
+       if (!ops) {
+               DRM_DEBUG("Failed to allocate incoming BO sync ops array\n");
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops,
+                          args->op_count * sizeof(*ops))) {
+               DRM_DEBUG("Failed to copy in BO sync ops\n");
+               ret = -EFAULT;
+               goto err_ops;
+       }
+
+       for (i = 0; i < args->op_count; i++) {
+               obj = drm_gem_object_lookup(file, ops[i].handle);
+               if (!obj) {
+                       ret = -ENOENT;
+                       goto err_ops;
+               }
+
+               ret = panfrost_gem_sync(obj, ops[i].type,
+                                       ops[i].offset, ops[i].size);
+
+               drm_gem_object_put(obj);
+
+               if (ret)
+                       goto err_ops;
+       }
+
+err_ops:
+       kvfree(ops);
+
+       return ret;
+}
+
 int panfrost_unstable_ioctl_check(void)
 {
        if (!unstable_ioctls)
@@ -649,6 +699,7 @@ static const struct drm_ioctl_desc 
panfrost_drm_driver_ioctls[] = {
        PANFROST_IOCTL(SET_LABEL_BO,    set_label_bo,   DRM_RENDER_ALLOW),
        PANFROST_IOCTL(JM_CTX_CREATE,   jm_ctx_create,  DRM_RENDER_ALLOW),
        PANFROST_IOCTL(JM_CTX_DESTROY,  jm_ctx_destroy, DRM_RENDER_ALLOW),
+       PANFROST_IOCTL(SYNC_BO,         sync_bo,        DRM_RENDER_ALLOW),
 };
 
 static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c 
b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 4afd1a7f77d5..8231ae04f54c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -489,6 +489,90 @@ panfrost_gem_set_label(struct drm_gem_object *obj, const 
char *label)
        kfree_const(old_label);
 }
 
+int
+panfrost_gem_sync(struct drm_gem_object *obj, u32 type, u32 offset, u32 size)
+{
+       struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+       struct drm_gem_shmem_object *shmem = &bo->base;
+       const struct drm_device *dev = shmem->base.dev;
+       struct sg_table *sgt;
+       struct scatterlist *sgl;
+       unsigned int count;
+
+       /* Make sure the range is in bounds. */
+       if (offset + size < offset || offset + size > shmem->base.size)
+               return -EINVAL;
+
+       /* Disallow CPU-cache maintenance on imported buffers. */
+       if (drm_gem_is_imported(&shmem->base))
+               return -EINVAL;
+
+       switch (type) {
+       case PANFROST_BO_SYNC_CPU_CACHE_FLUSH:
+       case PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE:
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       /* Don't bother if it's WC-mapped */
+       if (shmem->map_wc)
+               return 0;
+
+       /* Nothing to do if the size is zero. */
+       if (size == 0)
+               return 0;
+
+       sgt = drm_gem_shmem_get_pages_sgt(shmem);
+       if (IS_ERR(sgt))
+               return PTR_ERR(sgt);
+
+       for_each_sgtable_dma_sg(sgt, sgl, count) {
+               if (size == 0)
+                       break;
+
+               dma_addr_t paddr = sg_dma_address(sgl);
+               size_t len = sg_dma_len(sgl);
+
+               if (len <= offset) {
+                       offset -= len;
+                       continue;
+               }
+
+               paddr += offset;
+               len -= offset;
+               len = min_t(size_t, len, size);
+               size -= len;
+               offset = 0;
+
+               /* It's unclear whether dma_sync_xxx() is the right API to do 
CPU
+                * cache maintenance given an IOMMU can register their own
+                * implementation doing more than just CPU cache 
flushes/invalidation,
+                * and what we really care about here is CPU caches only, but 
that's
+                * the best we have that is both arch-agnostic and does at 
least the
+                * CPU cache maintenance on a <page,offset,size> tuple.
+                *
+                * Also, I wish we could do a single
+                *
+                *      dma_sync_single_for_device(BIDIR)
+                *
+                * and get a flush+invalidate, but that's not how it's 
implemented
+                * in practice (at least on arm64), so we have to make it
+                *
+                *      dma_sync_single_for_device(TO_DEVICE)
+                *      dma_sync_single_for_cpu(FROM_DEVICE)
+                *
+                * for the flush+invalidate case.
+                */
+               dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE);
+               if (type == PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE)
+                       dma_sync_single_for_cpu(dev->dev, paddr, len, 
DMA_FROM_DEVICE);
+       }
+
+       return 0;
+}
+
 void
 panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label)
 {
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h 
b/drivers/gpu/drm/panfrost/panfrost_gem.h
index 7fec20339354..d61ffe1f6841 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -151,6 +151,8 @@ int panfrost_gem_shrinker_init(struct drm_device *dev);
 void panfrost_gem_shrinker_cleanup(struct drm_device *dev);
 
 void panfrost_gem_set_label(struct drm_gem_object *obj, const char *label);
+int panfrost_gem_sync(struct drm_gem_object *obj, u32 type,
+                     u32 offset, u32 size);
 void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char 
*label);
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index 0c59714ae42b..e194e087a0c8 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -24,6 +24,7 @@ extern "C" {
 #define DRM_PANFROST_SET_LABEL_BO              0x09
 #define DRM_PANFROST_JM_CTX_CREATE             0x0a
 #define DRM_PANFROST_JM_CTX_DESTROY            0x0b
+#define DRM_PANFROST_SYNC_BO                   0x0c
 
 #define DRM_IOCTL_PANFROST_SUBMIT              DRM_IOW(DRM_COMMAND_BASE + 
DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
 #define DRM_IOCTL_PANFROST_WAIT_BO             DRM_IOW(DRM_COMMAND_BASE + 
DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
@@ -35,6 +36,7 @@ extern "C" {
 #define DRM_IOCTL_PANFROST_SET_LABEL_BO                
DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct 
drm_panfrost_set_label_bo)
 #define DRM_IOCTL_PANFROST_JM_CTX_CREATE       DRM_IOWR(DRM_COMMAND_BASE + 
DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create)
 #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY      DRM_IOWR(DRM_COMMAND_BASE + 
DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy)
+#define DRM_IOCTL_PANFROST_SYNC_BO             DRM_IOWR(DRM_COMMAND_BASE + 
DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo)
 
 /*
  * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module
@@ -308,6 +310,49 @@ struct drm_panfrost_set_label_bo {
        __u64 label;
 };
 
+/* Valid flags to pass to drm_panfrost_bo_sync_op */
+#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH                       0
+#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE                1
+
+/**
+ * struct drm_panthor_bo_flush_map_op - BO map sync op
+ */
+struct drm_panfrost_bo_sync_op {
+       /** @handle: Handle of the buffer object to sync. */
+       __u32 handle;
+
+       /** @type: Type of sync operation. */
+       __u32 type;
+
+       /**
+        * @offset: Offset into the BO at which the sync range starts.
+        *
+        * This will be rounded down to the nearest cache line as needed.
+        */
+       __u32 offset;
+
+       /**
+        * @size: Size of the range to sync
+        *
+        * @size + @offset will be rounded up to the nearest cache line as
+        * needed.
+        */
+       __u32 size;
+};
+
+/**
+ * struct drm_panfrost_sync_bo - ioctl argument for syncing BO maps
+ */
+struct drm_panfrost_sync_bo {
+       /** Array of struct drm_panfrost_bo_sync_op */
+       __u64 ops;
+
+       /** Number of BO sync ops */
+       __u32 op_count;
+
+       __u32 pad;
+};
+
 /* Definitions for coredump decoding in user space */
 #define PANFROSTDUMP_MAJOR 1
 #define PANFROSTDUMP_MINOR 0
-- 
2.51.1

Reply via email to