On 30/10/2025 14:05, Boris Brezillon wrote: > From: Faith Ekstrand <[email protected]> > > Will be used by the UMD to optimize CPU accesses to buffers > that are frequently read by the CPU, or on which the access > pattern makes non-cacheable mappings inefficient. > > Mapping buffers CPU-cached implies taking care of the CPU > cache maintenance in the UMD, unless the GPU is IO coherent. > > v2: > - Add more to the commit message > > v3: > - No changes > > v4: > - Fix the map_wc test in panfrost_ioctl_query_bo_info() > > v5: > - Drop Steve's R-b (enough has changed to justify a new review) > > Signed-off-by: Faith Ekstrand <[email protected]> > Signed-off-by: Boris Brezillon <[email protected]>
Reviewed-by: Steven Price <[email protected]> > --- > drivers/gpu/drm/panfrost/panfrost_drv.c | 10 ++++++-- > drivers/gpu/drm/panfrost/panfrost_gem.c | 33 +++++++++++++++++++++++++ > drivers/gpu/drm/panfrost/panfrost_gem.h | 5 ++++ > include/uapi/drm/panfrost_drm.h | 5 +++- > 4 files changed, 50 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c > b/drivers/gpu/drm/panfrost/panfrost_drv.c > index ba03a4420264..74b7dc75d88b 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c > @@ -125,6 +125,10 @@ static int panfrost_ioctl_get_param(struct drm_device > *ddev, void *data, struct > return 0; > } > > +#define PANFROST_BO_FLAGS (PANFROST_BO_NOEXEC | \ > + PANFROST_BO_HEAP | \ > + PANFROST_BO_WB_MMAP) > + > static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, > struct drm_file *file) > { > @@ -134,8 +138,7 @@ static int panfrost_ioctl_create_bo(struct drm_device > *dev, void *data, > struct panfrost_gem_mapping *mapping; > int ret; > > - if (!args->size || args->pad || > - (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) > + if (!args->size || args->pad || (args->flags & ~PANFROST_BO_FLAGS)) > return -EINVAL; > > /* Heaps should never be executable */ > @@ -652,6 +655,9 @@ static int panfrost_ioctl_query_bo_info(struct drm_device > *dev, void *data, > > if (bo->is_heap) > args->create_flags |= PANFROST_BO_HEAP; > + > + if (!bo->base.map_wc) > + args->create_flags |= PANFROST_BO_WB_MMAP; > } > > drm_gem_object_put(gem_obj); > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c > b/drivers/gpu/drm/panfrost/panfrost_gem.c > index 05d3f8a6fa78..1c600939c17a 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gem.c > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c > @@ -269,6 +269,7 @@ static const struct drm_gem_object_funcs > panfrost_gem_funcs = { > .vmap = drm_gem_shmem_object_vmap, > .vunmap = drm_gem_shmem_object_vunmap, > .mmap = drm_gem_shmem_object_mmap, > + .export = drm_gem_prime_export, > .status = panfrost_gem_status, > .rss = panfrost_gem_rss, > .vm_ops = &drm_gem_shmem_vm_ops, > @@ -302,12 +303,42 @@ struct drm_gem_object > *panfrost_gem_create_object(struct drm_device *dev, size_t > return &obj->base.base; > } > > +static bool > +should_map_wc(struct panfrost_gem_object *bo) > +{ > + struct panfrost_device *pfdev = to_panfrost_device(bo->base.base.dev); > + > + /* We can't do uncached mappings if the device is coherent, > + * because the zeroing done by the shmem layer at page allocation > + * time happens on a cached mapping which isn't CPU-flushed (at least > + * not on Arm64 where the flush is deferred to PTE setup time, and > + * only done conditionally based on the mapping permissions). We can't > + * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush > + * those, because they are NOPed if dma_dev_coherent() returns true. > + */ > + if (pfdev->coherent) > + return false; > + > + /* Cached mappings are explicitly requested, so no write-combine. */ > + if (bo->wb_mmap) > + return false; > + > + /* The default is write-combine. */ > + return true; > +} > + > struct panfrost_gem_object * > panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) > { > struct drm_gem_shmem_object *shmem; > struct panfrost_gem_object *bo; > > + /* The heap buffer is not supposed to be CPU-visible, so don't allow > + * WB_MMAP on those. > + */ > + if ((flags & PANFROST_BO_HEAP) && (flags & PANFROST_BO_WB_MMAP)) > + return ERR_PTR(-EINVAL); > + > /* Round up heap allocations to 2MB to keep fault handling simple */ > if (flags & PANFROST_BO_HEAP) > size = roundup(size, SZ_2M); > @@ -319,6 +350,8 @@ panfrost_gem_create(struct drm_device *dev, size_t size, > u32 flags) > bo = to_panfrost_bo(&shmem->base); > bo->noexec = !!(flags & PANFROST_BO_NOEXEC); > bo->is_heap = !!(flags & PANFROST_BO_HEAP); > + bo->wb_mmap = !!(flags & PANFROST_BO_WB_MMAP); > + bo->base.map_wc = should_map_wc(bo); > > return bo; > } > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h > b/drivers/gpu/drm/panfrost/panfrost_gem.h > index 87b918f30baa..d2d532b3007a 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gem.h > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h > @@ -98,6 +98,11 @@ struct panfrost_gem_object { > bool noexec :1; > bool is_heap :1; > > + /* On coherent devices, this reflects the creation flags, not the true > + * cacheability attribute of the mapping. > + */ > + bool wb_mmap :1; > + > #ifdef CONFIG_DEBUG_FS > struct panfrost_gem_debugfs debugfs; > #endif > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h > index 743c79a38f1b..82f4e69bafb4 100644 > --- a/include/uapi/drm/panfrost_drm.h > +++ b/include/uapi/drm/panfrost_drm.h > @@ -101,9 +101,12 @@ struct drm_panfrost_wait_bo { > __s64 timeout_ns; /* absolute */ > }; > > -/* Valid flags to pass to drm_panfrost_create_bo */ > +/* Valid flags to pass to drm_panfrost_create_bo. > + * PANFROST_BO_WB_MMAP can't be set if PANFROST_BO_HEAP is. > + */ > #define PANFROST_BO_NOEXEC 1 > #define PANFROST_BO_HEAP 2 > +#define PANFROST_BO_WB_MMAP 4 > > /** > * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs.
