Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.

Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
        0x2: CS_H
        0x1: CS_M
        0x0: CS_L

The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.

In order to prevent getting stuck in loops of CUs bouncing between GFX
and high priority compute and introducing further latency, we reserve
CUs 2+ for high priority compute on-demand.

v2: fix srbm_select to ring->queue and use ring->funcs->type

Signed-off-by: Andres Rodriguez <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 96 +++++++++++++++++++++++++++++-
 3 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index accb885..3d13127 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -921,6 +921,9 @@ struct amdgpu_gfx {
        unsigned                        num_gfx_rings;
        struct amdgpu_ring              compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
        unsigned                        num_compute_rings;
+       spinlock_t                      cu_reserve_lock;
+       uint32_t                        cu_reserve_pipe_mask;
+       uint32_t                        
cu_reserve_queue_mask[AMDGPU_MAX_COMPUTE_RINGS];
        struct amdgpu_irq_src           eop_irq;
        struct amdgpu_irq_src           priv_reg_irq;
        struct amdgpu_irq_src           priv_inst_irq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fb1303..86d76e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1708,6 +1708,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        spin_lock_init(&adev->gc_cac_idx_lock);
        spin_lock_init(&adev->audio_endpt_idx_lock);
        spin_lock_init(&adev->mm_stats.lock);
+       spin_lock_init(&adev->gfx.cu_reserve_lock);
 
        INIT_LIST_HEAD(&adev->shadow_list);
        mutex_init(&adev->shadow_list_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5db5bac..141c964 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -53,7 +53,10 @@
 
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_MEC_HPD_SIZE 2048
-
+#define GFX8_CU_RESERVE_RESOURCES 0x45888
+#define GFX8_CU_NUM 8
+#define GFX8_UNRESERVED_CU_NUM 2
+#define GFX8_CU_RESERVE_PIPE_SHIFT 7
 
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -6674,6 +6677,96 @@ static void gfx_v8_0_ring_set_wptr_compute(struct 
amdgpu_ring *ring)
        WDOORBELL32(ring->doorbell_index, ring->wptr);
 }
 
+static void gfx_v8_0_cu_reserve(struct amdgpu_device *adev,
+                               struct amdgpu_ring *ring, bool acquire)
+{
+       int i, resources;
+       int tmp = 0, queue_mask = 0, type_mask = 0;
+       int reserve_res_reg, reserve_en_reg;
+
+       /* gfx_v8_0_cu_reserve only supports compute path */
+       if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+               return;
+
+       spin_lock(&adev->gfx.cu_reserve_lock);
+       if (acquire) {
+               adev->gfx.cu_reserve_pipe_mask |= (1 << ring->pipe);
+               adev->gfx.cu_reserve_queue_mask[ring->pipe] |= (1 << 
ring->queue);
+       } else {
+               adev->gfx.cu_reserve_pipe_mask &= ~(1 << ring->pipe);
+               adev->gfx.cu_reserve_queue_mask[ring->pipe] &= ~(1 << 
ring->queue);
+       }
+
+       /* compute pipe 0 starts at GFX8_CU_RESERVE_PIPE_SHIFT */
+       type_mask = (adev->gfx.cu_reserve_pipe_mask << 
GFX8_CU_RESERVE_PIPE_SHIFT);
+
+       /* HW only has one register for queue mask, so we collaspse them */
+       for (i = 0; i < AMDGPU_MAX_COMPUTE_RINGS; i++)
+               queue_mask |= adev->gfx.cu_reserve_queue_mask[i];
+
+       /* leave the first CUs for general processing */
+       for (i = GFX8_UNRESERVED_CU_NUM; i < GFX8_CU_NUM; i++) {
+               reserve_res_reg = mmSPI_RESOURCE_RESERVE_CU_0 + i;
+               reserve_en_reg = mmSPI_RESOURCE_RESERVE_EN_CU_0 + i;
+
+               tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+                                   TYPE_MASK, type_mask);
+               tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+                                   QUEUE_MASK, queue_mask);
+               if (queue_mask) {
+                       resources = GFX8_CU_RESERVE_RESOURCES;
+                       tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+                                           EN, 1);
+               } else {
+                       resources = 0;
+                       tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+                                           EN, 0);
+               }
+               /* Commit */
+               WREG32(reserve_res_reg, resources);
+               WREG32(reserve_en_reg, tmp);
+       }
+
+       spin_unlock(&adev->gfx.cu_reserve_lock);
+}
+
+static void gfx_v8_0_set_spi_priority(struct amdgpu_device *adev,
+                                     struct amdgpu_ring *ring,
+                                     int priority)
+{
+       mutex_lock(&adev->srbm_mutex);
+       vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+       switch (priority) {
+       case AMDGPU_CTX_PRIORITY_NORMAL:
+               WREG32(mmCP_HQD_PIPE_PRIORITY, 0x0);
+               WREG32(mmCP_HQD_QUEUE_PRIORITY, 0x0);
+               break;
+       case AMDGPU_CTX_PRIORITY_HIGH:
+               WREG32(mmCP_HQD_PIPE_PRIORITY, 0x2);
+               WREG32(mmCP_HQD_QUEUE_PRIORITY, 0xf);
+               break;
+       default:
+               WARN(1, "Attempt to set invalid SPI priority for ring:%d\n",
+                               ring->idx);
+               break;
+       }
+
+       vi_srbm_select(adev, 0, 0, 0, 0);
+       mutex_unlock(&adev->srbm_mutex);
+}
+static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+                                              int priority)
+{
+       struct amdgpu_device *adev = ring->adev;
+
+       if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+               return;
+
+       gfx_v8_0_set_spi_priority(adev, ring, priority);
+       gfx_v8_0_cu_reserve(adev, ring, priority == AMDGPU_CTX_PRIORITY_HIGH);
+}
+
 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
                                             u64 addr, u64 seq,
                                             unsigned flags)
@@ -7081,6 +7174,7 @@ static const struct amdgpu_ring_funcs 
gfx_v8_0_ring_funcs_compute = {
        .test_ib = gfx_v8_0_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
+       .set_priority = gfx_v8_0_ring_set_priority_compute,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to