On 11/18/2021 12:32 PM, Prike Liang wrote:
Do ASIC reset at the moment Sx suspend aborted behind of amdgpu suspend
to keep AMDGPU in a clean reset state and that can avoid re-initialize
device improperly error.

Signed-off-by: Prike Liang <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 ++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 19 +++++++++++++++++++
  3 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b85b67a..8bd9833 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1053,6 +1053,7 @@ struct amdgpu_device {
        bool                            in_s3;
        bool                            in_s4;
        bool                            in_s0ix;
+       bool                            pm_completed;

PM framework maintains separate flags, why not use the same?

        dev->power.is_suspended = false;
        dev->power.is_noirq_suspended = false;
        dev->power.is_late_suspended = false;

BTW, Alex posted a patch which does similar thing, though it tries reset if suspend fails.

https://lore.kernel.org/all/dm6pr12mb26195f8e099407b4b6966febe4...@dm6pr12mb2619.namprd12.prod.outlook.com/

If that reset also failed, then no point in another reset, or keep it as part of resume.

Thanks,
Lijo

atomic_t in_gpu_reset;
        enum pp_mp1_state               mp1_state;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ec42a6f..a12ed54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3983,6 +3983,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
        if (adev->in_s0ix)
                amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
+ if (!adev->pm_completed) {
+               dev_warn(adev->dev, "suspend aborted will do asic reset\n");
+               amdgpu_asic_reset(adev);
+       }
        /* post card */
        if (amdgpu_device_need_post(adev)) {
                r = amdgpu_device_asic_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index eee3cf8..9f90017 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2168,6 +2168,23 @@ static int amdgpu_pmops_suspend(struct device *dev)
        return r;
  }
+/*
+ * Actually the PM suspend whether is completed should be confirmed
+ * by checking the sysfs sys/power/suspend_stats/failed_suspend.However,
+ * in this function only check the AMDGPU device whether is suspended
+ * completely in the system-wide suspend process.
+ */
+static int amdgpu_pmops_noirq_suspend(struct device *dev)
+{
+       struct drm_device *drm_dev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+       dev_dbg(dev, "amdgpu suspend completely.\n");
+       adev->pm_completed = true;
+
+       return 0;
+}
+
  static int amdgpu_pmops_resume(struct device *dev)
  {
        struct drm_device *drm_dev = dev_get_drvdata(dev);
@@ -2181,6 +2198,7 @@ static int amdgpu_pmops_resume(struct device *dev)
        r = amdgpu_device_resume(drm_dev, true);
        if (amdgpu_acpi_is_s0ix_active(adev))
                adev->in_s0ix = false;
+       adev->pm_completed = false;
        return r;
  }
@@ -2397,6 +2415,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
        .runtime_suspend = amdgpu_pmops_runtime_suspend,
        .runtime_resume = amdgpu_pmops_runtime_resume,
        .runtime_idle = amdgpu_pmops_runtime_idle,
+       .suspend_noirq = amdgpu_pmops_noirq_suspend,
  };
static int amdgpu_flush(struct file *f, fl_owner_t id)

Reply via email to