This will be used by the UMD to synchronize CPU-cached mappings when
the UMD can't do it directly (no usermode cache maintenance instruction
on Arm32).

v2:
- Change the flags so they better match the drm_gem_shmem_sync()
  semantics

v3:
- Add Steve's R-b

v4:
- No changes

v5:
- Drop Steve's R-b (the semantics changes call for a new review)

v6:
- Drop ret initialization in panthor_ioctl_bo_sync()
- Bail out early in panthor_ioctl_bo_sync() if ops.count is zero
- Drop unused PANTHOR_BO_SYNC_OP_FLAGS definition

v7:
- Hand-roll the sync logic (was previously provided by gem_shmem)

Signed-off-by: Faith Ekstrand <[email protected]>
Signed-off-by: Boris Brezillon <[email protected]>
---
 drivers/gpu/drm/panthor/panthor_drv.c | 41 ++++++++++++-
 drivers/gpu/drm/panthor/panthor_gem.c | 85 +++++++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_gem.h |  2 +
 include/uapi/drm/panthor_drm.h        | 52 ++++++++++++++++
 4 files changed, 179 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panthor/panthor_drv.c 
b/drivers/gpu/drm/panthor/panthor_drv.c
index d12ac4cb0ac4..cab19621917f 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -177,7 +177,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array 
*in, u32 min_stride,
                 PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), 
\
                 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
                 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, 
ringbuf_size), \
-                PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs))
+                PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \
+                PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size))
 
 /**
  * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object.
@@ -1396,6 +1397,43 @@ static int panthor_ioctl_set_user_mmio_offset(struct 
drm_device *ddev,
        return 0;
 }
 
+static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data,
+                                struct drm_file *file)
+{
+       struct drm_panthor_bo_sync *args = data;
+       struct drm_panthor_bo_sync_op *ops;
+       struct drm_gem_object *obj;
+       int ret;
+
+       if (!args->ops.count)
+               return 0;
+
+       ret = PANTHOR_UOBJ_GET_ARRAY(ops, &args->ops);
+       if (ret)
+               return ret;
+
+       for (u32 i = 0; i < args->ops.count; i++) {
+               obj = drm_gem_object_lookup(file, ops[i].handle);
+               if (!obj) {
+                       ret = -ENOENT;
+                       goto err_ops;
+               }
+
+               ret = panthor_gem_sync(obj, ops[i].type, ops[i].offset,
+                                      ops[i].size);
+
+               drm_gem_object_put(obj);
+
+               if (ret)
+                       goto err_ops;
+       }
+
+err_ops:
+       kvfree(ops);
+
+       return ret;
+}
+
 static int
 panthor_open(struct drm_device *ddev, struct drm_file *file)
 {
@@ -1470,6 +1508,7 @@ static const struct drm_ioctl_desc 
panthor_drm_driver_ioctls[] = {
        PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW),
        PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW),
        PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, 
DRM_RENDER_ALLOW),
+       PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW),
 };
 
 static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c 
b/drivers/gpu/drm/panthor/panthor_gem.c
index 173d42d65000..4be32fc1732b 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -447,6 +447,91 @@ panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo 
*bo, const char *label)
        panthor_gem_bo_set_label(bo->obj, str);
 }
 
+int
+panthor_gem_sync(struct drm_gem_object *obj, u32 type,
+                u64 offset, u64 size)
+{
+       struct panthor_gem_object *bo = to_panthor_bo(obj);
+       struct drm_gem_shmem_object *shmem = &bo->base;
+       const struct drm_device *dev = shmem->base.dev;
+       struct sg_table *sgt;
+       struct scatterlist *sgl;
+       unsigned int count;
+
+       /* Make sure the range is in bounds. */
+       if (offset + size < offset || offset + size > shmem->base.size)
+               return -EINVAL;
+
+       /* Disallow CPU-cache maintenance on imported buffers. */
+       if (drm_gem_is_imported(&shmem->base))
+               return -EINVAL;
+
+       switch (type) {
+       case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH:
+       case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE:
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       /* Don't bother if it's WC-mapped */
+       if (shmem->map_wc)
+               return 0;
+
+       /* Nothing to do if the size is zero. */
+       if (size == 0)
+               return 0;
+
+       sgt = drm_gem_shmem_get_pages_sgt(shmem);
+       if (IS_ERR(sgt))
+               return PTR_ERR(sgt);
+
+       for_each_sgtable_dma_sg(sgt, sgl, count) {
+               if (size == 0)
+                       break;
+
+               dma_addr_t paddr = sg_dma_address(sgl);
+               size_t len = sg_dma_len(sgl);
+
+               if (len <= offset) {
+                       offset -= len;
+                       continue;
+               }
+
+               paddr += offset;
+               len -= offset;
+               len = min_t(size_t, len, size);
+               size -= len;
+               offset = 0;
+
+               /* It's unclear whether dma_sync_xxx() is the right API to do 
CPU
+                * cache maintenance given an IOMMU can register their own
+                * implementation doing more than just CPU cache 
flushes/invalidation,
+                * and what we really care about here is CPU caches only, but 
that's
+                * the best we have that is both arch-agnostic and does at 
least the
+                * CPU cache maintenance on a <page,offset,size> tuple.
+                *
+                * Also, I wish we could do a single
+                *
+                *      dma_sync_single_for_device(BIDIR)
+                *
+                * and get a flush+invalidate, but that's not how it's 
implemented
+                * in practice (at least on arm64), so we have to make it
+                *
+                *      dma_sync_single_for_device(TO_DEVICE)
+                *      dma_sync_single_for_cpu(FROM_DEVICE)
+                *
+                * for the flush+invalidate case.
+                */
+               dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE);
+               if (type == DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE)
+                       dma_sync_single_for_cpu(dev->dev, paddr, len, 
DMA_FROM_DEVICE);
+       }
+
+       return 0;
+}
+
 #ifdef CONFIG_DEBUG_FS
 struct gem_size_totals {
        size_t size;
diff --git a/drivers/gpu/drm/panthor/panthor_gem.h 
b/drivers/gpu/drm/panthor/panthor_gem.h
index 91d1880f8a5d..bbf9ae75c360 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.h
+++ b/drivers/gpu/drm/panthor/panthor_gem.h
@@ -146,6 +146,8 @@ panthor_gem_create_with_handle(struct drm_file *file,
 
 void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label);
 void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char 
*label);
+int panthor_gem_sync(struct drm_gem_object *obj,
+                    u32 type, u64 offset, u64 size);
 
 struct drm_gem_object *
 panthor_gem_prime_import(struct drm_device *dev,
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index 28cf9e878db6..9f810305db6e 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -144,6 +144,9 @@ enum drm_panthor_ioctl_id {
         * pgoff_t size.
         */
        DRM_PANTHOR_SET_USER_MMIO_OFFSET,
+
+       /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */
+       DRM_PANTHOR_BO_SYNC,
 };
 
 /**
@@ -1073,6 +1076,53 @@ struct drm_panthor_set_user_mmio_offset {
        __u64 offset;
 };
 
+/**
+ * enum drm_panthor_bo_sync_op_type - BO sync type
+ */
+enum drm_panthor_bo_sync_op_type {
+       /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: Flush CPU caches. */
+       DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH = 0,
+
+       /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: Flush and 
invalidate CPU caches. */
+       DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE = 1,
+};
+
+/**
+ * struct drm_panthor_bo_sync_op - BO map sync op
+ */
+struct drm_panthor_bo_sync_op {
+       /** @handle: Handle of the buffer object to sync. */
+       __u32 handle;
+
+       /** @type: Type of operation. */
+       __u32 type;
+
+       /**
+        * @offset: Offset into the BO at which the sync range starts.
+        *
+        * This will be rounded down to the nearest cache line as needed.
+        */
+       __u64 offset;
+
+       /**
+        * @size: Size of the range to sync
+        *
+        * @size + @offset will be rounded up to the nearest cache line as
+        * needed.
+        */
+       __u64 size;
+};
+
+/**
+ * struct drm_panthor_bo_sync - BO map sync request
+ */
+struct drm_panthor_bo_sync {
+       /**
+        * @ops: Array of struct drm_panthor_bo_sync_op sync operations.
+        */
+       struct drm_panthor_obj_array ops;
+};
+
 /**
  * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number
  * @__access: Access type. Must be R, W or RW.
@@ -1119,6 +1169,8 @@ enum {
                DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label),
        DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET =
                DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, 
set_user_mmio_offset),
+       DRM_IOCTL_PANTHOR_BO_SYNC =
+               DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync),
 };
 
 #if defined(__cplusplus)
-- 
2.51.1

Reply via email to