When kfd need to be reset, sent command to HWS might cause hang and get 
unnecessary timeout.
This change try not to touch HW in pre_reset and keep queues to be in the 
evicted state
when the reset is done, so they are not put back on the runlist. These queues 
will be destroied
on process termination.

Signed-off-by: shaoyunl <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c              | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e9601d4dfb77..0a60317509c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1430,7 +1430,7 @@ static int unmap_queues_cpsch(struct device_queue_manager 
*dqm,
 
        if (!dqm->sched_running)
                return 0;
-       if (dqm->is_hws_hang)
+       if (dqm->is_hws_hang || dqm->is_resetting)
                return -EIO;
        if (!dqm->active_runlist)
                return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index f8a8fdb95832..f29b3932e3dc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1715,7 +1715,11 @@ int kfd_process_evict_queues(struct kfd_process *p)
 
                r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
                                                            &pdd->qpd);
-               if (r) {
+               /* evict return -EIO if HWS is hang or asic is resetting, in 
this case
+                * we would like to set all the queues to be in evicted state 
to prevent
+                * them been add back since they actually not be saved right 
now.
+                */
+               if (r && r != -EIO) {
                        pr_err("Failed to evict process queues\n");
                        goto fail;
                }
-- 
2.17.1

Reply via email to