issue:
the vmflush in KCQ could be preempted (not like GFX ring
which doesn't allow preemption in ring buffer) and this lead
to vm flush fail when there is a world switch during
the vm flush procedure (between write invalidate request
and query invalidate ack)

fix:
separate vm flush for gfx and compute ring, and use
the new format command in compute's vm flush which
use only one package so no preemption could allowed

Signed-off-by: Monk Liu <[email protected]>
Signed-off-by: Emily Deng <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 10 +++++++++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 14 +++++++++-----
 4 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a7e2229..986659f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1790,6 +1790,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), 
(d), (v), (m))
+#define amdgpu_ring_emit_reg_wait1(r, d0, d1, v, m) 
(r)->funcs->emit_reg_wait1((r), (d0), (d1), (v), (m))
 #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1d0d250..d85df5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -152,6 +152,8 @@ struct amdgpu_ring_funcs {
        void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
        void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
                              uint32_t val, uint32_t mask);
+       void (*emit_reg_wait1)(struct amdgpu_ring *ring, uint32_t reg0,
+                               uint32_t reg1, uint32_t val, uint32_t mask);
        void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
        /* priority functions */
        void (*set_priority) (struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d1d2c27..d36b29e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4014,6 +4014,13 @@ static void gfx_v9_0_ring_emit_reg_wait(struct 
amdgpu_ring *ring, uint32_t reg,
        gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 }
 
+static void gfx_v9_0_ring_emit_reg_wait_compute(struct amdgpu_ring *ring,
+                                       uint32_t reg0, uint32_t reg1,
+                                       uint32_t val, uint32_t mask)
+{
+       gfx_v9_0_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, val, mask, 0x20);
+}
+
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
                                                 enum amdgpu_interrupt_state 
state)
 {
@@ -4351,7 +4358,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_compute = {
                7 + /* gfx_v9_0_ring_emit_hdp_flush */
                5 + /* hdp invalidate */
                7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-               SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+               (SOC15_FLUSH_GPU_TLB_NUM_WREG - 1) * 5 +
                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                2 + /* gfx_v9_0_ring_emit_vm_flush */
                8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm 
fence */
@@ -4369,6 +4376,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_compute = {
        .set_priority = gfx_v9_0_ring_set_priority_compute,
        .emit_wreg = gfx_v9_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+       .emit_reg_wait1 = gfx_v9_0_ring_emit_reg_wait_compute,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 517712b..60be1a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -380,7 +380,6 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev,
        }
 }
 
-
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
                                            unsigned vmid, uint64_t pd_addr)
 {
@@ -399,11 +398,16 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct 
amdgpu_ring *ring,
        amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
                              upper_32_bits(pd_addr));
 
-       amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
 
-       /* wait for the invalidate to complete */
-       amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-                                 1 << vmid, 1 << vmid);
+       if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+               amdgpu_ring_emit_reg_wait1(ring, hub->vm_inv_eng0_req + eng,
+                                  hub->vm_inv_eng0_ack + eng, req, 1 << vmid);
+       } else {
+               amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
+               /* wait for the invalidate to complete */
+               amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
+                          1 << vmid, 1 << vmid);
+       }
 
        return pd_addr;
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to