changes:
1)implement strict mode sriov gpu reset
2)always call sriov_gpu_reset_strict if hypervisor notify FLR
3)in strict reset mode, set error to all fences.
4)change fence_wait/cs_wait functions to return -ENODEV if fence signaled
with error == -ETIME,

Since after strict gpu reset we consider the VRAM were lost,
and since assuming VRAM lost there is little help to recover
shadow BO because all textures/resources/shaders cannot
recovered (if they resident in VRAM)

Change-Id: I50d9b8b5185ba92f137f07c9deeac19d740d753b
Signed-off-by: Monk Liu <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 25 ++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 90 +++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |  1 +
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c         |  4 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c         |  4 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 60 ++++++++++++++++++
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  2 +
 10 files changed, 188 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index de11527..de9c164 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -123,6 +123,7 @@ extern int amdgpu_cntl_sb_buf_per_se;
 extern int amdgpu_param_buf_per_se;
 extern int amdgpu_job_hang_limit;
 extern int amdgpu_lbpw;
+extern int amdgpu_sriov_reset_level;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c6a214f..9467cf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1262,6 +1262,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void 
*data,
        struct amdgpu_ctx *ctx;
        struct dma_fence *fence;
        long r;
+       int fence_err = 0;
 
        if (amdgpu_kms_vram_lost(adev, fpriv))
                return -ENODEV;
@@ -1283,6 +1284,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void 
*data,
                r = PTR_ERR(fence);
        else if (fence) {
                r = dma_fence_wait_timeout(fence, true, timeout);
+               /* gpu hang and this fence is signaled by gpu reset if 
fence_err < 0 */
+               fence_err = dma_fence_get_status(fence);
                dma_fence_put(fence);
        } else
                r = 1;
@@ -1292,7 +1295,10 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void 
*data,
                return r;
 
        memset(wait, 0, sizeof(*wait));
-       wait->out.status = (r == 0);
+       wait->out.status = (fence_err < 0);
+
+       if (fence_err < 0)
+               return -ENODEV;
 
        return 0;
 }
@@ -1346,6 +1352,7 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device 
*adev,
        uint32_t fence_count = wait->in.fence_count;
        unsigned int i;
        long r = 1;
+       int fence_err = 0;
 
        for (i = 0; i < fence_count; i++) {
                struct dma_fence *fence;
@@ -1358,16 +1365,20 @@ static int amdgpu_cs_wait_all_fences(struct 
amdgpu_device *adev,
                        continue;
 
                r = dma_fence_wait_timeout(fence, true, timeout);
+               fence_err = dma_fence_get_status(fence);
                dma_fence_put(fence);
                if (r < 0)
                        return r;
 
-               if (r == 0)
+               if (r == 0 || fence_err < 0)
                        break;
        }
 
        memset(wait, 0, sizeof(*wait));
-       wait->out.status = (r > 0);
+       wait->out.status = (r > 0 && fence_err == 0);
+
+       if (fence_err < 0)
+               return -ENODEV;
 
        return 0;
 }
@@ -1391,6 +1402,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device 
*adev,
        struct dma_fence **array;
        unsigned int i;
        long r;
+       int fence_err = 0;
 
        /* Prepare the fence array */
        array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
@@ -1418,10 +1430,12 @@ static int amdgpu_cs_wait_any_fence(struct 
amdgpu_device *adev,
                                       &first);
        if (r < 0)
                goto err_free_fence_array;
+       else
+               fence_err = dma_fence_get_status(array[first]);
 
 out:
        memset(wait, 0, sizeof(*wait));
-       wait->out.status = (r > 0);
+       wait->out.status = (r > 0 && fence_err == 0);
        wait->out.first_signaled = first;
        /* set return value 0 to indicate success */
        r = 0;
@@ -1431,6 +1445,9 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device 
*adev,
                dma_fence_put(array[i]);
        kfree(array);
 
+       if (fence_err < 0)
+               return -ENODEV;
+
        return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9efbb33..122e2e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2734,6 +2734,96 @@ static int amdgpu_recover_vram_from_shadow(struct 
amdgpu_device *adev,
 }
 
 /**
+ * amdgpu_sriov_gpu_reset_strict - reset the asic under strict mode
+ *
+ * @adev: amdgpu device pointer
+ * @job: which job trigger hang
+ *
+ * Attempt the reset the GPU if it has hung (all asics).
+ * for SRIOV case.
+ * Returns 0 for success or an error on failure.
+ *
+ * this function will deny all process/fence created before this reset,
+ * and drop all jobs unfinished during this reset.
+ *
+ * Application should take the responsibility to re-open the FD to re-create
+ * the VM page table and recover all resources as well
+ *
+ **/
+int amdgpu_sriov_gpu_reset_strict(struct amdgpu_device *adev, struct 
amdgpu_job *job)
+{
+       int i, r = 0;
+       int resched;
+       struct amdgpu_ring *ring;
+
+       /* other thread is already into the gpu reset so just quit and come 
later */
+       if (!atomic_add_unless(&adev->in_sriov_reset, 1, 1))
+               return -EAGAIN;
+
+       atomic_inc(&adev->gpu_reset_counter);
+
+       /* block TTM */
+       resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+
+       /* fake signal jobs already scheduled  */
+       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+               ring = adev->rings[i];
+
+               if (!ring || !ring->sched.thread)
+                       continue;
+
+               kthread_park(ring->sched.thread);
+               amd_sched_set_sched_hang(&ring->sched);
+               amdgpu_fence_driver_force_completion_ring(ring);
+               amd_sched_set_queue_hang(&ring->sched);
+       }
+
+       /* request to take full control of GPU before re-initialization  */
+       if (job)
+               amdgpu_virt_reset_gpu(adev);
+       else
+               amdgpu_virt_request_full_gpu(adev, true);
+
+       /* Resume IP prior to SMC */
+       amdgpu_sriov_reinit_early(adev);
+
+       /* we need recover gart prior to run SMC/CP/SDMA resume */
+       amdgpu_ttm_recover_gart(adev);
+
+       /* now we are okay to resume SMC/CP/SDMA */
+       amdgpu_sriov_reinit_late(adev);
+
+       /* resume IRQ status */
+       amdgpu_irq_gpu_reset_resume_helper(adev);
+
+       if (amdgpu_ib_ring_tests(adev))
+               dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", 
r);
+
+       /* release full control of GPU after ib test */
+       amdgpu_virt_release_full_gpu(adev, true);
+
+       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+               ring = adev->rings[i];
+
+               if (!ring || !ring->sched.thread)
+                       continue;
+
+               kthread_unpark(ring->sched.thread);
+       }
+
+       drm_helper_resume_force_mode(adev->ddev);
+
+       ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
+       if (r)
+               dev_info(adev->dev, "Strict mode GPU reset failed\n");
+       else
+               dev_info(adev->dev, "Strict mode GPU reset successed!\n");
+
+       atomic_set(&adev->in_sriov_reset, 0);
+       return 0;
+}
+
+/**
  * amdgpu_sriov_gpu_reset - reset the asic
  *
  * @adev: amdgpu device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8f5211c..eee67dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -123,6 +123,7 @@ int amdgpu_cntl_sb_buf_per_se = 0;
 int amdgpu_param_buf_per_se = 0;
 int amdgpu_job_hang_limit = 0;
 int amdgpu_lbpw = -1;
+int amdgpu_sriov_reset_level = 0;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -269,6 +270,9 @@ module_param_named(job_hang_limit, amdgpu_job_hang_limit, 
int ,0444);
 MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 
= disable, -1 = auto)");
 module_param_named(lbpw, amdgpu_lbpw, int, 0444);
 
+MODULE_PARM_DESC(sriov_reset_level, "what level will gpu reset on, 0: loose, 
1:strict, other:disable (default 0))");
+module_param_named(sriov_reset_level, amdgpu_sriov_reset_level, int ,0444);
+
 #ifdef CONFIG_DRM_AMDGPU_SI
 
 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 0db81a4..933823a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -41,7 +41,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
                int r;
 
 try_again:
-               r = amdgpu_sriov_gpu_reset(job->adev, job);
+               if (amdgpu_sriov_reset_level == 1)
+                       r = amdgpu_sriov_gpu_reset_strict(job->adev, job);
+               else
+                       r = amdgpu_sriov_gpu_reset(job->adev, job);
+
                if (r == -EAGAIN) {
                        /* maye two different schedulers all have hang job, try 
later */
                        schedule();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index a3cbd5a..5664a10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -100,5 +100,6 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
 int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+int amdgpu_sriov_gpu_reset_strict(struct amdgpu_device *adev, struct 
amdgpu_job *job);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 2812d88..00a9629 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -247,8 +247,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
                return;
        }
 
-       /* Trigger recovery due to world switch failure */
-       amdgpu_sriov_gpu_reset(adev, NULL);
+       /* use strict mode if FLR triggered from hypervisor */
+       amdgpu_sriov_gpu_reset_strict(adev, NULL);
 }
 
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index c25a831..c94b6e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -513,8 +513,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct 
*work)
                return;
        }
 
-       /* Trigger recovery due to world switch failure */
-       amdgpu_sriov_gpu_reset(adev, NULL);
+       /* use strict mode if FLR triggered from hypervisor */
+       amdgpu_sriov_gpu_reset_strict(adev, NULL);
 }
 
 static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 97c94f9..12c3092 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -430,6 +430,66 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler 
*sched)
        spin_unlock(&sched->job_list_lock);
 }
 
+/**
+ * amd_sched_set_sched_hang
+ * @sched: the scheduler need to set all pending jobs hang
+ *
+ * this routine set all unfinished jobs pending in the sched to
+ * an error -ETIME statues
+ *
+ **/
+void amd_sched_set_sched_hang(struct amd_gpu_scheduler *sched)
+{
+       struct amd_sched_job *s_job;
+
+       spin_lock(&sched->job_list_lock);
+       list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node)
+               dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+
+       spin_unlock(&sched->job_list_lock);
+}
+
+/**
+ * amd_sched_set_queue_hang
+ * @sched: the scheduler need to set all job in kfifo hang
+ *
+ * this routine set all jobs in the KFIFO of @sched to an error
+ * -ETIME status and signal those jobs.
+ *
+ **/
+
+void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched)
+{
+       struct amd_sched_entity *entity, *tmp;
+       struct amd_sched_job *s_job;
+       struct amd_sched_rq *rq;
+       int i;
+
+       /* set HANG status on all jobs queued and fake signal them */
+       for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++) {
+               rq = &sched->sched_rq[i];
+
+               spin_lock(&rq->lock);
+               list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
+                       if (entity->dependency) {
+                               dma_fence_remove_callback(entity->dependency, 
&entity->cb);
+                               dma_fence_put(entity->dependency);
+                               entity->dependency = NULL;
+                       }
+
+                       spin_lock(&entity->queue_lock);
+                       while(kfifo_out(&entity->job_queue, &s_job, 
sizeof(s_job)) == sizeof(s_job)) {
+                               dma_fence_set_error(&s_job->s_fence->finished, 
-ETIME);
+                               amd_sched_fence_scheduled(s_job->s_fence);
+                               amd_sched_fence_finished(s_job->s_fence);
+                       }
+                       spin_unlock(&entity->queue_lock);
+               }
+               spin_unlock(&rq->lock);
+       }
+       wake_up(&sched->job_scheduled);
+}
+
 void amd_sched_job_kickout(struct amd_sched_job *s_job)
 {
        struct amd_gpu_scheduler *sched = s_job->sched;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index f9d8f28..f0242aa 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -167,4 +167,6 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler 
*sched);
 bool amd_sched_dependency_optimized(struct dma_fence* fence,
                                    struct amd_sched_entity *entity);
 void amd_sched_job_kickout(struct amd_sched_job *s_job);
+void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched);
+void amd_sched_set_sched_hang(struct amd_gpu_scheduler *sched);
 #endif
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to