This change introduces a new parameter `hang_detect_then_reset` in the
MES reset queue input structure to handle GFX queue resets more robustly.

The change includes:
1. Adding the `hang_detect_then_reset` flag to `mes_reset_queue_input`
2. Setting this flag when resetting GFX queues in `mes_userq_reset`
3. Implementing the hang detection path in `mes_v11_0_reset_hw_queue`

Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h    | 1 +
 drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c     | 3 +++
 3 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index c0d2c195fe2e..31826a20c56b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -277,6 +277,7 @@ struct mes_reset_queue_input {
        uint64_t                           wptr_addr;
        uint32_t                           vmid;
        bool                               legacy_gfx;
+       bool                               hang_detect_then_reset;
        bool                               is_kq;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 650fdb68db12..2b5bd3691766 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -358,6 +358,8 @@ static int mes_userq_reset(struct amdgpu_userq_mgr *uq_mgr,
 
        queue_input.doorbell_offset = queue->doorbell_index;
        queue_input.queue_type = queue->queue_type;
+       if (queue->queue_type == AMDGPU_RING_TYPE_GFX)
+               queue_input.hang_detect_then_reset = true;
 
        amdgpu_mes_lock(&adev->mes);
        r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index c9eba537de09..25ea06b507c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -773,6 +773,9 @@ static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
                mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
                mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
                mes_reset_queue_pkt.vmid_id_lp = input->vmid;
+       } else if (input->hang_detect_then_reset) {
+               mes_reset_queue_pkt.hang_detect_then_reset = 1;
+               mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
        } else {
                mes_reset_queue_pkt.reset_queue_only = 1;
                mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
-- 
2.49.0

Reply via email to