From: Faith Ekstrand <[email protected]> Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient.
Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message v3: - No changes v4: - Fix the map_wc test in panfrost_ioctl_query_bo_info() v5: - Drop Steve's R-b (enough has changed to justify a new review) Signed-off-by: Faith Ekstrand <[email protected]> Signed-off-by: Boris Brezillon <[email protected]> --- drivers/gpu/drm/panfrost/panfrost_drv.c | 10 ++++++-- drivers/gpu/drm/panfrost/panfrost_gem.c | 33 +++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 5 ++++ include/uapi/drm/panfrost_drm.h | 5 +++- 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index ba03a4420264..74b7dc75d88b 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -125,6 +125,10 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct return 0; } +#define PANFROST_BO_FLAGS (PANFROST_BO_NOEXEC | \ + PANFROST_BO_HEAP | \ + PANFROST_BO_WB_MMAP) + static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file) { @@ -134,8 +138,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct panfrost_gem_mapping *mapping; int ret; - if (!args->size || args->pad || - (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) + if (!args->size || args->pad || (args->flags & ~PANFROST_BO_FLAGS)) return -EINVAL; /* Heaps should never be executable */ @@ -652,6 +655,9 @@ static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data, if (bo->is_heap) args->create_flags |= PANFROST_BO_HEAP; + + if (!bo->base.map_wc) + args->create_flags |= PANFROST_BO_WB_MMAP; } drm_gem_object_put(gem_obj); diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 05d3f8a6fa78..1c600939c17a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -269,6 +269,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = { .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = drm_gem_shmem_object_mmap, + .export = drm_gem_prime_export, .status = panfrost_gem_status, .rss = panfrost_gem_rss, .vm_ops = &drm_gem_shmem_vm_ops, @@ -302,12 +303,42 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t return &obj->base.base; } +static bool +should_map_wc(struct panfrost_gem_object *bo) +{ + struct panfrost_device *pfdev = to_panfrost_device(bo->base.base.dev); + + /* We can't do uncached mappings if the device is coherent, + * because the zeroing done by the shmem layer at page allocation + * time happens on a cached mapping which isn't CPU-flushed (at least + * not on Arm64 where the flush is deferred to PTE setup time, and + * only done conditionally based on the mapping permissions). We can't + * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush + * those, because they are NOPed if dma_dev_coherent() returns true. + */ + if (pfdev->coherent) + return false; + + /* Cached mappings are explicitly requested, so no write-combine. */ + if (bo->wb_mmap) + return false; + + /* The default is write-combine. */ + return true; +} + struct panfrost_gem_object * panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) { struct drm_gem_shmem_object *shmem; struct panfrost_gem_object *bo; + /* The heap buffer is not supposed to be CPU-visible, so don't allow + * WB_MMAP on those. + */ + if ((flags & PANFROST_BO_HEAP) && (flags & PANFROST_BO_WB_MMAP)) + return ERR_PTR(-EINVAL); + /* Round up heap allocations to 2MB to keep fault handling simple */ if (flags & PANFROST_BO_HEAP) size = roundup(size, SZ_2M); @@ -319,6 +350,8 @@ panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) bo = to_panfrost_bo(&shmem->base); bo->noexec = !!(flags & PANFROST_BO_NOEXEC); bo->is_heap = !!(flags & PANFROST_BO_HEAP); + bo->wb_mmap = !!(flags & PANFROST_BO_WB_MMAP); + bo->base.map_wc = should_map_wc(bo); return bo; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 87b918f30baa..d2d532b3007a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -98,6 +98,11 @@ struct panfrost_gem_object { bool noexec :1; bool is_heap :1; + /* On coherent devices, this reflects the creation flags, not the true + * cacheability attribute of the mapping. + */ + bool wb_mmap :1; + #ifdef CONFIG_DEBUG_FS struct panfrost_gem_debugfs debugfs; #endif diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 743c79a38f1b..82f4e69bafb4 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -101,9 +101,12 @@ struct drm_panfrost_wait_bo { __s64 timeout_ns; /* absolute */ }; -/* Valid flags to pass to drm_panfrost_create_bo */ +/* Valid flags to pass to drm_panfrost_create_bo. + * PANFROST_BO_WB_MMAP can't be set if PANFROST_BO_HEAP is. + */ #define PANFROST_BO_NOEXEC 1 #define PANFROST_BO_HEAP 2 +#define PANFROST_BO_WB_MMAP 4 /** * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. -- 2.51.0
