On 10/10/2025 11:11, Boris Brezillon wrote:
> Will be needed if we want to skip CPU cache maintenance operations when
> the GPU can snoop CPU caches.
>
> v2:
> - New commit
>
> Signed-off-by: Boris Brezillon <[email protected]>
> ---
> drivers/gpu/drm/panfrost/panfrost_device.h | 1 +
> drivers/gpu/drm/panfrost/panfrost_drv.c | 1 +
> drivers/gpu/drm/panfrost/panfrost_gpu.c | 18 +++++++++++++++++-
> drivers/gpu/drm/panfrost/panfrost_regs.h | 2 ++
> include/uapi/drm/panfrost_drm.h | 7 +++++++
> 5 files changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h
> b/drivers/gpu/drm/panfrost/panfrost_device.h
> index 1e73efad02a8..bd38b7ae169e 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -75,6 +75,7 @@ struct panfrost_features {
> u32 thread_max_workgroup_sz;
> u32 thread_max_barrier_sz;
> u32 coherency_features;
> + u32 selected_coherency;
> u32 afbc_features;
> u32 texture_features[4];
> u32 js_features[16];
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c
> b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 607a5b8448d0..3ffcd08f7745 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -94,6 +94,7 @@ static int panfrost_ioctl_get_param(struct drm_device
> *ddev, void *data, struct
> PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15);
> PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups);
> PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc);
> + PANFROST_FEATURE(SELECTED_COHERENCY, selected_coherency);
>
> case DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP:
> ret = panfrost_ioctl_query_timestamp(pfdev, ¶m->value);
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c
> b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> index 174e190ba40f..fed323e6a307 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> @@ -260,7 +260,23 @@ static void panfrost_gpu_init_features(struct
> panfrost_device *pfdev)
> pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS);
> pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev,
> GPU_THREAD_MAX_WORKGROUP_SIZE);
> pfdev->features.thread_max_barrier_sz = gpu_read(pfdev,
> GPU_THREAD_MAX_BARRIER_SIZE);
> - pfdev->features.coherency_features = gpu_read(pfdev,
> GPU_COHERENCY_FEATURES);
> +
> + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_COHERENCY_REG))
> + pfdev->features.coherency_features = gpu_read(pfdev,
> GPU_COHERENCY_FEATURES);
> + else
> + pfdev->features.coherency_features = COHERENCY_ACE_LITE;
> +
> + if (!pfdev->coherent) {
> + pfdev->features.selected_coherency = COHERENCY_NONE;
> + } else if (pfdev->features.coherency_features & COHERENCY_ACE) {
> + pfdev->features.selected_coherency = COHERENCY_ACE;
> + } else if (pfdev->features.coherency_features & COHERENCY_ACE_LITE) {
> + pfdev->features.selected_coherency = COHERENCY_ACE_LITE;
> + } else {
> + drm_WARN(pfdev->ddev, true, "No known coherency protocol
> supported");
> + pfdev->features.selected_coherency = COHERENCY_NONE;
> + }
Same comment as for panthor about not using bits when we can't have more
than one. But also here because selected_coherency is only a UAPI
concept, it would make sense to use the UAPI values, i.e.
DRM_PANFROST_GPU_COHERENCY_ACE_LITE etc rather than the private
COHERENCY_ACE_LITE defines.
Although there is actually a COHERENCY_ENABLE register on some GPUs
(BASE_HW_FEATURE_COHERENCY_REG in the kbase driver). Looks like it was
probably introduced for Bifrost. But AFAIK the above checks should be ok.
Thanks,
Steve
> +
> pfdev->features.afbc_features = gpu_read(pfdev, GPU_AFBC_FEATURES);
> for (i = 0; i < 4; i++)
> pfdev->features.texture_features[i] = gpu_read(pfdev,
> GPU_TEXTURE_FEATURES(i));
> diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h
> b/drivers/gpu/drm/panfrost/panfrost_regs.h
> index 2b8f1617b836..775ad88f7a86 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_regs.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
> @@ -103,8 +103,10 @@
> #define GPU_L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present
> bitmap, high word */
>
> #define GPU_COHERENCY_FEATURES 0x300 /* (RO) Coherency
> features present */
> +#define GPU_COHERENCY_ENABLE 0x304 /* (RW) Coherency protocol
> selection */
> #define COHERENCY_ACE_LITE BIT(0)
> #define COHERENCY_ACE BIT(1)
> +#define COHERENCY_NONE 31
>
> #define GPU_STACK_PRESENT_LO 0xE00 /* (RO) Core stack present
> bitmap, low word */
> #define GPU_STACK_PRESENT_HI 0xE04 /* (RO) Core stack present
> bitmap, high word */
> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> index e8b47c9f6976..d3f6c005463c 100644
> --- a/include/uapi/drm/panfrost_drm.h
> +++ b/include/uapi/drm/panfrost_drm.h
> @@ -188,6 +188,13 @@ enum drm_panfrost_param {
> DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP,
> DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY,
> DRM_PANFROST_PARAM_ALLOWED_JM_CTX_PRIORITIES,
> + DRM_PANFROST_PARAM_SELECTED_COHERENCY,
> +};
> +
> +enum drm_panfrost_gpu_coherency {
> + DRM_PANFROST_GPU_COHERENCY_ACE_LITE = (1 << 0),
> + DRM_PANFROST_GPU_COHERENCY_ACE = (1 << 1),
> + DRM_PANFROST_GPU_COHERENCY_NONE = 31,
> };
>
> struct drm_panfrost_get_param {