1,add new parm @job to indicate which ring hang.
2,return when calling to amdgpu_gpu_reset if under SRIOV case

with this patch, we'll reset & recovery on the perticular ring
instead of overkill all ring.

Change-Id: I37dff5c16ef161e3a8425434516ebb285fbe6600
Signed-off-by: Monk Liu <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 6 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c      | 2 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c      | 2 +-
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0c51fb5..157d023 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2528,6 +2528,7 @@ static int amdgpu_recover_vram_from_shadow(struct 
amdgpu_device *adev,
  * amdgpu_sriov_gpu_reset - reset the asic
  *
  * @adev: amdgpu device pointer
+ * @job: the job lead to hang
  * @voluntary: if this reset is requested by guest.
  *             (true means by guest and false means by HYPERVISOR )
  *
@@ -2535,9 +2536,9 @@ static int amdgpu_recover_vram_from_shadow(struct 
amdgpu_device *adev,
  * for SRIOV case.
  * Returns 0 for success or an error on failure.
  */
-int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
+int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job, 
bool voluntary)
 {
-       int i, r = 0;
+       int i, j, r = 0;
        int resched;
        struct amdgpu_bo *bo, *tmp;
        struct amdgpu_ring *ring;
@@ -2652,7 +2653,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
        bool need_full_reset;
 
        if (amdgpu_sriov_vf(adev))
-               return amdgpu_sriov_gpu_reset(adev, true);
+               return -EINVAL;
 
        if (!amdgpu_check_soft_reset(adev)) {
                DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 0209c96..2cc7c63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -36,7 +36,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
                  job->base.sched->name,
                  atomic_read(&job->ring->fence_drv.last_seq),
                  job->ring->fence_drv.sync_seq);
-       amdgpu_gpu_reset(job->adev);
+
+       if (amdgpu_sriov_vf(job->adev))
+               amdgpu_sriov_gpu_reset(job->adev, job, true);
+       else
+               amdgpu_gpu_reset(job->adev);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index a8ed162..2b641eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -97,7 +97,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, 
uint32_t reg, uint32_t v);
 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
-int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
+int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job, 
bool voluntary);
 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index e967a7b..4d71db3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -243,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
        }
 
        /* Trigger recovery due to world switch failure */
-       amdgpu_sriov_gpu_reset(adev, false);
+       amdgpu_sriov_gpu_reset(adev, NULL, false);
 }
 
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index f0d64f1..a604bd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct 
*work)
        }
 
        /* Trigger recovery due to world switch failure */
-       amdgpu_sriov_gpu_reset(adev, false);
+       amdgpu_sriov_gpu_reset(adev, NULL, false);
 }
 
 static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to