amdgpu: allocate multiple clear entities

Pierre-Eric Pelloux-Prayer Thu, 13 Nov 2025 08:09:30 -0800

No functional change for now, as we always use entity 0.

Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c     | 11 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 76 +++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h     | 10 +--
 5 files changed, 66 insertions(+), 39 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 2a444d02cf4b..e73dcfed5338 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -655,7 +655,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, 
uint32_t vmid,
        struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
        struct dma_fence *fence;
        struct amdgpu_job *job;
-       int r;
+       int r, i;
 
        if (!hub->sdma_invalidation_workaround || vmid ||
            !adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready ||
@@ -686,8 +686,9 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, 
uint32_t vmid,
         * translation. Avoid this by doing the invalidation from the SDMA
         * itself at least for GART.
         */
-       mutex_lock(&adev->mman.clear_entity.gart_window_lock);
        mutex_lock(&adev->mman.move_entity.gart_window_lock);
+       for (i = 0; i < adev->mman.num_clear_entities; i++)
+               mutex_lock(&adev->mman.clear_entities[i].gart_window_lock);
        r = amdgpu_job_alloc_with_ib(ring->adev, 
&adev->mman.default_entity.base,
                                     AMDGPU_FENCE_OWNER_UNDEFINED,
                                     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
@@ -701,7 +702,8 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, 
uint32_t vmid,
        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
        fence = amdgpu_job_submit(job);
        mutex_unlock(&adev->mman.move_entity.gart_window_lock);
-       mutex_unlock(&adev->mman.clear_entity.gart_window_lock);
+       for (i = 0; i < adev->mman.num_clear_entities; i++)
+               mutex_unlock(&adev->mman.clear_entities[i].gart_window_lock);
 
        dma_fence_wait(fence, false);
        dma_fence_put(fence);
@@ -710,7 +712,8 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, 
uint32_t vmid,
 
 error_alloc:
        mutex_unlock(&adev->mman.move_entity.gart_window_lock);
-       mutex_unlock(&adev->mman.clear_entity.gart_window_lock);
+       for (i = 0; i < adev->mman.num_clear_entities; i++)
+               mutex_unlock(&adev->mman.clear_entities[i].gart_window_lock);
        dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 0760e70402ec..3771e89035f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -269,10 +269,12 @@ static const struct ttm_resource_manager_func 
amdgpu_gtt_mgr_func = {
  *
  * @adev: amdgpu_device pointer
  * @gtt_size: maximum size of GTT
+ * @reserved_windows: num of already used windows
  *
  * Allocate and initialize the GTT manager.
  */
-int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size,
+                       u32 reserved_windows)
 {
        struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
        struct ttm_resource_manager *man = &mgr->manager;
@@ -283,7 +285,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, 
uint64_t gtt_size)
 
        ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
 
-       start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+       start = AMDGPU_GTT_MAX_TRANSFER_SIZE * reserved_windows;
        size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
        drm_mm_init(&mgr->mm, start, size);
        spin_lock_init(&mgr->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index c06c132a753c..e7b2cae031b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1321,7 +1321,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
        if (r)
                goto out;
 
-       r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, 
&bo->base._resv,
+       r = amdgpu_fill_buffer(&adev->mman.clear_entities[0], abo, 0, 
&bo->base._resv,
                               &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
        if (WARN_ON(r))
                goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7193a341689d..2f305ad32080 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1891,6 +1891,7 @@ static void amdgpu_ttm_mmio_remap_bo_fini(struct 
amdgpu_device *adev)
 int amdgpu_ttm_init(struct amdgpu_device *adev)
 {
        uint64_t gtt_size;
+       u32 gart_window;
        int r;
 
        dma_set_max_seg_size(adev->dev, UINT_MAX);
@@ -1923,7 +1924,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        }
 
        /* Change the size here instead of the init above so only lpfn is 
affected */
-       amdgpu_ttm_set_buffer_funcs_status(adev, false);
+       gart_window = amdgpu_ttm_set_buffer_funcs_status(adev, false);
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_X86
        if (adev->gmc.xgmi.connected_to_cpu)
@@ -2019,7 +2020,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        }
 
        /* Initialize GTT memory pool */
-       r = amdgpu_gtt_mgr_init(adev, gtt_size);
+       r = amdgpu_gtt_mgr_init(adev, gtt_size, gart_window);
        if (r) {
                dev_err(adev->dev, "Failed initializing GTT heap.\n");
                return r;
@@ -2158,16 +2159,22 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
  *
  * Enable/disable use of buffer functions during suspend/resume. This should
  * only be called at bootup or when userspace isn't running.
+ *
+ * Returns: the number of GART reserved window
  */
-void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool 
enable)
+u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 {
        struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, 
TTM_PL_VRAM);
        uint64_t size;
-       int r, i;
+       int r, i, j;
+       u32 num_clear_entities, windows, w;
+
+       num_clear_entities = adev->sdma.num_instances;
+       windows = adev->gmc.is_app_apu ? 0 : (2 + num_clear_entities);
 
        if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
            adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
-               return;
+               return windows;
 
        if (enable) {
                struct amdgpu_ring *ring;
@@ -2180,19 +2187,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
                                          1, NULL);
                if (r) {
                        dev_err(adev->dev,
-                               "Failed setting up TTM BO move entity (%d)\n",
+                               "Failed setting up TTM BO eviction entity 
(%d)\n",
                                r);
-                       return;
-               }
-
-               r = drm_sched_entity_init(&adev->mman.clear_entity.base,
-                                         DRM_SCHED_PRIORITY_NORMAL, &sched,
-                                         1, NULL);
-               if (r) {
-                       dev_err(adev->dev,
-                               "Failed setting up TTM BO clear entity (%d)\n",
-                               r);
-                       goto error_free_entity;
+                       return 0;
                }
 
                r = drm_sched_entity_init(&adev->mman.move_entity.base,
@@ -2202,26 +2199,51 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
                        dev_err(adev->dev,
                                "Failed setting up TTM BO move entity (%d)\n",
                                r);
-                       drm_sched_entity_destroy(&adev->mman.clear_entity.base);
                        goto error_free_entity;
                }
 
+               adev->mman.num_clear_entities = num_clear_entities;
+               adev->mman.clear_entities = kcalloc(num_clear_entities,
+                                                   sizeof(struct 
amdgpu_ttm_buffer_entity),
+                                                   GFP_KERNEL);
+               if (!adev->mman.clear_entities)
+                       goto error_free_entity;
+
+               for (i = 0; i < num_clear_entities; i++) {
+                       r = 
drm_sched_entity_init(&adev->mman.clear_entities[i].base,
+                                                 DRM_SCHED_PRIORITY_NORMAL, 
&sched,
+                                                 1, NULL);
+                       if (r) {
+                               for (j = 0; j < i; j++)
+                                       drm_sched_entity_destroy(
+                                               
&adev->mman.clear_entities[j].base);
+                               kfree(adev->mman.clear_entities);
+                               goto error_free_entity;
+                       }
+               }
+
                /* Statically assign GART windows to each entity. */
+               w = 0;
                mutex_init(&adev->mman.default_entity.gart_window_lock);
-               adev->mman.move_entity.gart_window_id0 = 0;
-               adev->mman.move_entity.gart_window_id1 = 1;
+               adev->mman.move_entity.gart_window_id0 = w++;
+               adev->mman.move_entity.gart_window_id1 = w++;
                mutex_init(&adev->mman.move_entity.gart_window_lock);
-               /* Clearing entity doesn't use id0 */
-               adev->mman.clear_entity.gart_window_id1 = 2;
-               mutex_init(&adev->mman.clear_entity.gart_window_lock);
+               for (i = 0; i < num_clear_entities; i++) {
+                       /* Clearing entities don't use id0 */
+                       adev->mman.clear_entities[i].gart_window_id1 = w++;
+                       
mutex_init(&adev->mman.clear_entities[i].gart_window_lock);
+               }
+               WARN_ON(w != windows);
        } else {
                drm_sched_entity_destroy(&adev->mman.default_entity.base);
-               drm_sched_entity_destroy(&adev->mman.clear_entity.base);
                drm_sched_entity_destroy(&adev->mman.move_entity.base);
+               for (i = 0; i < num_clear_entities; i++)
+                       
drm_sched_entity_destroy(&adev->mman.clear_entities[i].base);
                for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
                        dma_fence_put(man->eviction_fences[i]);
                        man->eviction_fences[i] = NULL;
                }
+               kfree(adev->mman.clear_entities);
        }
 
        /* this just adjusts TTM size idea, which sets lpfn to the correct 
value */
@@ -2232,10 +2254,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
        man->size = size;
        adev->mman.buffer_funcs_enabled = enable;
 
-       return;
+       return windows;
 
 error_free_entity:
        drm_sched_entity_destroy(&adev->mman.default_entity.base);
+       return 0;
 }
 
 static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@@ -2388,8 +2411,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
 
        if (!fence)
                return -EINVAL;
-
-       entity = &adev->mman.clear_entity;
+       entity = &adev->mman.clear_entities[0];
        *fence = dma_fence_get_stub();
 
        amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index f4f762be9fdd..797f851a4578 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -39,7 +39,6 @@
 #define __AMDGPU_PL_NUM        (TTM_PL_PRIV + 6)
 
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
-#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        3
 
 extern const struct attribute_group amdgpu_vram_mgr_attr_group;
 extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
@@ -73,8 +72,9 @@ struct amdgpu_mman {
        struct mutex                            gtt_window_lock;
 
        struct amdgpu_ttm_buffer_entity default_entity; /* has no gart windows 
*/
-       struct amdgpu_ttm_buffer_entity clear_entity;
        struct amdgpu_ttm_buffer_entity move_entity;
+       struct amdgpu_ttm_buffer_entity *clear_entities;
+       u32 num_clear_entities;
 
        struct amdgpu_vram_mgr vram_mgr;
        struct amdgpu_gtt_mgr gtt_mgr;
@@ -134,7 +134,7 @@ struct amdgpu_copy_mem {
 #define AMDGPU_COPY_FLAGS_GET(value, field) \
        (((__u32)(value) >> AMDGPU_COPY_FLAGS_##field##_SHIFT) & 
AMDGPU_COPY_FLAGS_##field##_MASK)
 
-int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size, u32 
reserved_windows);
 void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
 int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
 void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
@@ -168,8 +168,8 @@ bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
 
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
-void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
-                                       bool enable);
+u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
+                                      bool enable);
 int amdgpu_copy_buffer(struct amdgpu_ring *ring,
                       struct drm_sched_entity *entity,
                       uint64_t src_offset,
-- 
2.43.0

[PATCH v2 07/20] drm/amdgpu: allocate multiple clear entities

Reply via email to