From: Alex Deucher <[email protected]>

Replace the queue remove/add approach with suspend/resume semantics
for user queue preemption. This change:

1. Maintains queue scheduling registration while only preempting execution
   - Previously used remove_queue/add_queue would fully deregister queues
   - New suspend/resume approach keeps scheduler state while preempting

2. Introduces proper preemption helpers:
   - amdgpu_userqueue_preempt_helper(): Suspends queue execution
     - Transitions MAPPED→UNMAPPED state on success
     - Marks as HUNG and triggers reset on failure
   - amdgpu_userqueue_restore_helper(): Resumes queue execution
     - Transitions UNMAPPED→MAPPED state on success
     - Triggers GPU reset on failure

Signed-off-by: Alex Deucher <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 53 +++++++++++++++++++++--
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 0c91302162fa..af0ac4b73ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -185,6 +185,54 @@ amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
        return r;
 }
 
+static int
+amdgpu_userqueue_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
+                         struct amdgpu_usermode_queue *queue)
+{
+       struct amdgpu_device *adev = uq_mgr->adev;
+       const struct amdgpu_userq_funcs *userq_funcs =
+               adev->userq_funcs[queue->queue_type];
+       int r = 0;
+
+       if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+               r = userq_funcs->preempt(uq_mgr, queue);
+               if (r) {
+                       amdgpu_userq_detect_and_reset_queues(uq_mgr);
+                       queue->state = AMDGPU_USERQ_STATE_HUNG;
+               } else {
+                       queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
+               }
+       }
+
+       return r;
+}
+
+static int
+amdgpu_userqueue_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
+                       struct amdgpu_usermode_queue *queue)
+{
+       struct amdgpu_device *adev = uq_mgr->adev;
+       const struct amdgpu_userq_funcs *userq_funcs =
+               adev->userq_funcs[queue->queue_type];
+       bool gpu_reset = false;
+       int r = 0;
+
+       if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
+               r = userq_funcs->restore(uq_mgr, queue);
+               if (r) {
+                       queue->state = AMDGPU_USERQ_STATE_HUNG;
+                       gpu_reset = true;
+               } else {
+                       queue->state = AMDGPU_USERQ_STATE_MAPPED;
+               }
+       }
+
+       if (gpu_reset)
+               amdgpu_userq_gpu_reset(adev);
+
+       return r;
+}
+
 static void
 amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
                                 struct amdgpu_usermode_queue *queue)
@@ -639,7 +687,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
 
        /* Resume all the queues for this process */
        idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
-               r = amdgpu_userq_map_helper(uq_mgr, queue);
+               r = amdgpu_userqueue_restore_helper(uq_mgr, queue);
                if (r)
                        ret = r;
        }
@@ -794,10 +842,9 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
        int queue_id;
        int ret = 0, r;
 
-       amdgpu_userq_detect_and_reset_queues(uq_mgr);
        /* Try to unmap all the queues in this process ctx */
        idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
-               r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+               r = amdgpu_userqueue_preempt_helper(uq_mgr, queue);
                if (r)
                        ret = r;
        }
-- 
2.49.0

Reply via email to