On 30/10/2025 14:05, Boris Brezillon wrote:
> From: Faith Ekstrand <[email protected]>
> 
> This enables syncing mapped GEM objects between the CPU and GPU via calls
> to dma_sync_*().  It's a bit annoying as it requires walking the sg_table
> so it's best if every driver doesn't hand-roll it.
> 
> v2:
> - s/drm_gem_shmem_sync_mmap/drm_gem_shmem_sync/
> - Change the prototype to match drm_gem_object_funcs::sync()
> - Add a wrapper for drm_gem_object_funcs::sync()
> 
> v3:
> - No changes
> 
> v4:
> - Add Steve's R-b
> 
> v5:
> - Change the semantics of the drm_gem_shmem_sync() helper to better
>   reflect the UMD cache flush/flush+invalidate semantics (discussed
>   with Faith)
> - Drop R-bs
> 
> Signed-off-by: Faith Ekstrand <[email protected]>
> Signed-off-by: Boris Brezillon <[email protected]>

Reviewed-by: Steven Price <[email protected]>

> ---
>  drivers/gpu/drm/drm_gem_shmem_helper.c | 93 ++++++++++++++++++++++++++
>  include/drm/drm_gem_shmem_helper.h     | 14 ++++
>  2 files changed, 107 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c 
> b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index e49c75739c20..d9266e22a0dc 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -690,6 +690,99 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object 
> *shmem, struct vm_area_struct
>  }
>  EXPORT_SYMBOL_GPL(drm_gem_shmem_mmap);
>  
> +/**
> + * drm_gem_shmem_sync - Sync CPU-mapped data
> + * @shmem: shmem GEM object
> + * @offset: Offset into the GEM object
> + * @size: Size of the area to sync
> + * @type: Type of synchronization
> + *
> + * Returns:
> + * 0 on success or a negative error code on failure.
> + */
> +int drm_gem_shmem_sync(struct drm_gem_shmem_object *shmem, size_t offset,
> +                    size_t size, enum drm_gem_shmem_sync_type type)
> +{
> +     const struct drm_device *dev = shmem->base.dev;
> +     struct sg_table *sgt;
> +     struct scatterlist *sgl;
> +     unsigned int count;
> +
> +     /* Make sure the range is in bounds. */
> +     if (offset + size < offset || offset + size > shmem->base.size)
> +             return -EINVAL;
> +
> +     /* Disallow CPU-cache maintenance on imported buffers. */
> +     if (drm_gem_is_imported(&shmem->base))
> +             return -EINVAL;
> +
> +     switch (type) {
> +     case DRM_GEM_SHMEM_SYNC_CPU_CACHE_FLUSH:
> +     case DRM_GEM_SHMEM_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE:
> +             break;
> +
> +     default:
> +             return -EINVAL;
> +     }
> +
> +     /* Don't bother if it's WC-mapped */
> +     if (shmem->map_wc)
> +             return 0;
> +
> +     /* Nothing to do if the size is zero. */
> +     if (size == 0)
> +             return 0;
> +
> +     sgt = drm_gem_shmem_get_pages_sgt(shmem);
> +     if (IS_ERR(sgt))
> +             return PTR_ERR(sgt);
> +
> +     for_each_sgtable_dma_sg(sgt, sgl, count) {
> +             if (size == 0)
> +                     break;
> +
> +             dma_addr_t paddr = sg_dma_address(sgl);
> +             size_t len = sg_dma_len(sgl);
> +
> +             if (len <= offset) {
> +                     offset -= len;
> +                     continue;
> +             }
> +
> +             paddr += offset;
> +             len -= offset;
> +             len = min_t(size_t, len, size);
> +             size -= len;
> +             offset = 0;
> +
> +             /* It's unclear whether dma_sync_xxx() is the right API to do 
> CPU
> +              * cache maintenance given an IOMMU can register their own
> +              * implementation doing more than just CPU cache 
> flushes/invalidation,
> +              * and what we really care about here is CPU caches only, but 
> that's
> +              * the best we have that is both arch-agnostic and does at 
> least the
> +              * CPU cache maintenance on a <page,offset,size> tuple.
> +              *
> +              * Also, I wish we could do a single
> +              *
> +              *      dma_sync_single_for_device(BIDIR)
> +              *
> +              * and get a flush+invalidate, but that's not how it's 
> implemented
> +              * in practice (at least on arm64), so we have to make it
> +              *
> +              *      dma_sync_single_for_device(TO_DEVICE)
> +              *      dma_sync_single_for_cpu(FROM_DEVICE)
> +              *
> +              * for the flush+invalidate case.
> +              */
> +             dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE);
> +             if (type == DRM_GEM_SHMEM_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE)
> +                     dma_sync_single_for_cpu(dev->dev, paddr, len, 
> DMA_FROM_DEVICE);
> +     }
> +
> +     return 0;
> +}
> +EXPORT_SYMBOL_GPL(drm_gem_shmem_sync);
> +
>  /**
>   * drm_gem_shmem_print_info() - Print &drm_gem_shmem_object info for debugfs
>   * @shmem: shmem GEM object
> diff --git a/include/drm/drm_gem_shmem_helper.h 
> b/include/drm/drm_gem_shmem_helper.h
> index 075275d6b2fd..b0b6d0104a9a 100644
> --- a/include/drm/drm_gem_shmem_helper.h
> +++ b/include/drm/drm_gem_shmem_helper.h
> @@ -124,6 +124,20 @@ void drm_gem_shmem_vunmap_locked(struct 
> drm_gem_shmem_object *shmem,
>                                struct iosys_map *map);
>  int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct 
> vm_area_struct *vma);
>  
> +/**
> + * enum enum drm_gem_shmem_sync_type - Type of synchronization
> + */
> +enum drm_gem_shmem_sync_type {
> +     /** DRM_GEM_SHMEM_SYNC_CPU_CACHE_FLUSH: Flush CPU caches */
> +     DRM_GEM_SHMEM_SYNC_CPU_CACHE_FLUSH = 0,
> +
> +     /** DRM_GEM_SHMEM_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: Flush and 
> invalidate CPU caches */
> +     DRM_GEM_SHMEM_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE,
> +};
> +
> +int drm_gem_shmem_sync(struct drm_gem_shmem_object *shmem, size_t offset,
> +                    size_t size, enum drm_gem_shmem_sync_type type);
> +
>  int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem);
>  void drm_gem_shmem_unpin_locked(struct drm_gem_shmem_object *shmem);
>  

Reply via email to