-----Original Message-----
From: Christian König [mailto:[email protected]]
Sent: Tuesday, March 27, 2018 3:48 PM
To: Deng, Emily <[email protected]>; [email protected]
Cc: Liu, Monk <[email protected]>
Subject: Re: [PATCH] drm/amdgpu: fix a kcq hang issue for SRIOV
Am 27.03.2018 um 07:58 schrieb Emily Deng:
issue:
the vmflush in KCQ could be preempted (not like GFX ring which doesn't
allow preemption in ring buffer) and this lead to vm flush fail when
there is a world switch during the vm flush procedure (between write
invalidate request and query invalidate ack)
fix:
separate vm flush for gfx and compute ring, and use the new format
command in compute's vm flush which use only one package so no
preemption could allowed
NAK, as already discussed multiple times now that only circumvents the
problem, but not really fixes it.
Just executing the "amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req +
eng, req);" multiple times has the same effect and we need to figure out why.
Regards,
Christian.
Signed-off-by: Monk Liu <[email protected]>
Signed-off-by: Emily Deng <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++++++++-
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 18 +++++++++++++-----
4 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a7e2229..986659f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1790,6 +1790,7 @@ amdgpu_get_sdma_instance(struct
amdgpu_ring *ring)
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m)
(r)->funcs->emit_reg_wait((r), (d), (v), (m))
+#define amdgpu_ring_emit_reg_wait1(r, d0, d1, v, m)
+(r)->funcs->emit_reg_wait1((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r)
(r)->funcs->init_cond_exec((r)) diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1d0d250..d85df5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -152,6 +152,8 @@ struct amdgpu_ring_funcs {
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t
val);
void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask);
+ void (*emit_reg_wait1)(struct amdgpu_ring *ring, uint32_t reg0,
+ uint32_t reg1, uint32_t val, uint32_t mask);
void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
/* priority functions */
void (*set_priority) (struct amdgpu_ring *ring, diff --git
a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1ae3de1..509c9d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4078,6 +4078,13 @@ static void gfx_v9_0_ring_emit_reg_wait(struct
amdgpu_ring *ring, uint32_t reg,
gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
}
+static void gfx_v9_0_ring_emit_reg_wait_compute(struct amdgpu_ring
*ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v9_0_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, val, mask, 0x20); }
+
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device
*adev,
enum
amdgpu_interrupt_state state)
{
@@ -4415,7 +4422,7 @@ static const struct amdgpu_ring_funcs
gfx_v9_0_ring_funcs_compute = {
7 + /* gfx_v9_0_ring_emit_hdp_flush */
5 + /* hdp invalidate */
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ (SOC15_FLUSH_GPU_TLB_NUM_WREG - 1) * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm
fence
*/ @@ -4433,6 +4440,7 @@ static const struct amdgpu_ring_funcs
gfx_v9_0_ring_funcs_compute = {
.set_priority = gfx_v9_0_ring_set_priority_compute,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_wait1 = gfx_v9_0_ring_emit_reg_wait_compute,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e687363..968447d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -385,11 +385,19 @@ static uint64_t
gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
upper_32_bits(pd_addr));
- amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
- /* wait for the invalidate to complete */
- amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
- 1 << vmid, 1 << vmid);
+ /* The world switch cannot be allowed to occur while
+ some invalidation controller code is waiting for an ack.
+ To workaround the hardware restriction, replace the original
+ two command with one command for compute ring */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
amdgpu_sriov_vf(adev)) {
+ amdgpu_ring_emit_reg_wait1(ring, hub->vm_inv_eng0_req
+ eng,
+ hub->vm_inv_eng0_ack + eng, req, 1 <<
vmid);
+ } else {
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng,
req);
+ /* wait for the invalidate to complete */
+ amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack +
eng,
+ 1 << vmid, 1 << vmid);
+ }
return pd_addr;
}