From: Loïc Molinari <[email protected]> Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient.
Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message - Tweak the doc - Make sure we sync the section of the BO pointing to the CS syncobj before we read its seqno Signed-off-by: Loïc Molinari <[email protected]> Signed-off-by: Boris Brezillon <[email protected]> --- drivers/gpu/drm/panthor/panthor_drv.c | 8 +++++++- drivers/gpu/drm/panthor/panthor_gem.c | 3 +++ drivers/gpu/drm/panthor/panthor_sched.c | 18 ++++++++++++++++-- include/uapi/drm/panthor_drm.h | 9 +++++++++ 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 9004d0ba0e45..af9b431255a4 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -900,7 +900,8 @@ static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data, return panthor_vm_pool_destroy_vm(pfile->vms, args->id); } -#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP +#define PANTHOR_BO_FLAGS (DRM_PANTHOR_BO_NO_MMAP | \ + DRM_PANTHOR_BO_WB_MMAP) static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, struct drm_file *file) @@ -919,6 +920,10 @@ static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, goto out_dev_exit; } + if ((args->flags & DRM_PANTHOR_BO_NO_MMAP) && + (args->flags & DRM_PANTHOR_BO_WB_MMAP)) + return -EINVAL; + if (args->exclusive_vm_id) { vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id); if (!vm) { @@ -1450,6 +1455,7 @@ static int panthor_ioctl_bo_query_info(struct drm_device *ddev, void *data, return -ENOENT; bo = to_panthor_bo(obj); + args->pad = 0; args->create_flags = bo->flags; diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 617e04134d30..a0ccc316e375 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -280,6 +280,9 @@ panthor_gem_create_with_handle(struct drm_file *file, bo = to_panthor_bo(&shmem->base); bo->flags = flags; + if (flags & DRM_PANTHOR_BO_WB_MMAP) + shmem->map_wc = false; + if (exclusive_vm) { bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm); drm_gem_object_get(bo->exclusive_vm_root_gem); diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 0cc9055f4ee5..5f9dba3a9d65 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -868,8 +868,11 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue struct iosys_map map; int ret; - if (queue->syncwait.kmap) - return queue->syncwait.kmap + queue->syncwait.offset; + if (queue->syncwait.kmap) { + bo = container_of(queue->syncwait.obj, + struct panthor_gem_object, base.base); + goto out_sync; + } bo = panthor_vm_get_bo_for_va(group->vm, queue->syncwait.gpu_va, @@ -886,6 +889,17 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap)) goto err_put_syncwait_obj; +out_sync: + /* Make sure the CPU caches are invalidated before the seqno is read. + * drm_gem_shmem_sync() is a NOP if wap_wc=false, so no need to check + * it here. + */ + drm_gem_shmem_sync(&bo->base, queue->syncwait.offset, + queue->syncwait.sync64 ? + sizeof(struct panthor_syncobj_64b) : + sizeof(struct panthor_syncobj_32b), + DRM_GEM_OBJECT_CPU_ACCESS | DRM_GEM_OBJECT_READ_ACCESS); + return queue->syncwait.kmap + queue->syncwait.offset; err_put_syncwait_obj: diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 54502286c8b1..e77d65e51c64 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -681,6 +681,15 @@ struct drm_panthor_vm_get_state { enum drm_panthor_bo_flags { /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ DRM_PANTHOR_BO_NO_MMAP = (1 << 0), + + /** + * @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping. + * + * CPU map the buffer object in userspace by forcing the "Write-Back + * Cacheable" cacheability attribute. The mapping otherwise uses the + * "Non-Cacheable" attribute if the GPU is not IO coherent. + */ + DRM_PANTHOR_BO_WB_MMAP = (1 << 1), }; /** -- 2.51.0
