On Wed, Mar 26, 2025 at 1:22 AM Kenneth Feng <[email protected]> wrote:
>
> Port the workload profile setting logic into dm before MALL optimization.
>
> Background:
> MALL optimization strategy has changed in the firmware.Previously, firmware 
> does not
> care what workload type it is, once there is a request from DAL for MALL, 
> firmware immediately
> trigger the MALL setting sequence on the SoC, so called D0i3.x idle power 
> sequence.
> Now, before the D0i3.x sequence starts, firmware always check if the workload 
> type is default,
> if it is not, then abort the D0i3.x sequence.
>
> Issue:
> Due to this strategy change, the task is moved to driver to make sure if gfx 
> is really idle and
> if it is, reset the workload to default. Without this task, when DAL's work 
> task for MALL optimization
> tries to do the optimization request to DMCUB->pmfw, the workload type is 
> always 3D fullscreen or compute,
> then MALL will never be applied.
>
> Why:
> The idle task for setting workload type back to default interval is 1 second 
> currently. The DAL's work task
> to optimize MALL always starts before the idle task for setting workload type 
> back to default. There is no way
> to ask the idle task in the base driver to reset the workload type ahead of 
> the DAL's MALL setting work task
> kicks off. There could be a workaround which sets the idle task interval to 
> 10 millisecond. However, this causes
> some call trace issues in which the workqueues is flushed.

That should already fixed by this commit:
https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=de35994ecd2dd6148ab5a6c5050a1670a04dec77

>
> Side effect:
> This solution is to port the logic in idle thread to DAL: check the fence and 
> make sure gfx is idle, then reset the workload
> type. It is fine that when DAL's work task exits the MALL optimization, it 
> does not set back the workload type to 3d fullscreen
> or compute since the work task in base driver can make sure the workload type 
> can be set back once there are jobs in the ring.

I guess this works because the workload ref count gets clamped to 0,
but this call is not balanced.  It also doesn't handle the VIDEO
profile that gets set when using VCN or the COMPUTE profile when KFD
queues are active.  Those would also prevent the idle optimizations.
Also what happens if the profile changes after DC optimizations are
enabled?  Does that cause the optimizations to exit or will they stay
intact until DC tells it to exit?

So I think we have two options:
1. always disable the 3D, compute, video profiles when entering the
DAL optimization. subsequently, additional job submissions may change
the workload.  will that be a problem?
2. Add a helper to pause workload profiles while the DC optimization
is active.  If the profile only has to be changed while enabling the
DC optimization, we can just call it right before and right after the
dc optimizations.  Something like the attached patches should be a
good starting point.

Alex

>
> Signed-off-by: Kenneth Feng <[email protected]>
> ---
>  .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c    | 29 ++++++++++++++++++-
>  1 file changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
> index 36a830a7440f..2adb3b72ed05 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
> @@ -244,6 +244,20 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct 
> work_struct *work)
>         struct vblank_control_work *vblank_work =
>                 container_of(work, struct vblank_control_work, work);
>         struct amdgpu_display_manager *dm = vblank_work->dm;
> +       u32 i, fences = 0;
> +       int r;
> +       enum PP_SMC_POWER_PROFILE profile;
> +       struct amdgpu_device *adev = drm_to_adev(dm->ddev);
> +
> +       if (adev->gfx.num_gfx_rings)
> +               profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
> +       else
> +               profile = PP_SMC_POWER_PROFILE_COMPUTE;
> +
> +       for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
> +               fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
> +       for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); 
> ++i)
> +               fences += 
> amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
>
>         mutex_lock(&dm->dc_lock);
>
> @@ -271,8 +285,21 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct 
> work_struct *work)
>                         vblank_work->acrtc->dm_irq_params.allow_sr_entry);
>         }
>
> -       if (dm->active_vblank_irq_count == 0)
> +       if (dm->active_vblank_irq_count == 0) {
> +               if (adev->gfx.num_gfx_rings && !fences && 
> !atomic_read(&adev->gfx.total_submission_cnt)) {
> +                       mutex_lock(&adev->gfx.workload_profile_mutex);
> +                       if (adev->gfx.workload_profile_active) {
> +                               r = amdgpu_dpm_switch_power_profile(adev, 
> profile, false);
> +                               if (r)
> +                               dev_warn(adev->dev, "(%d) failed to disable 
> %s power profile mode\n", r,
> +                                                                       
> profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
> +                                                                       
> "fullscreen 3D" : "compute");
> +                               adev->gfx.workload_profile_active = false;
> +                       }
> +                       mutex_unlock(&adev->gfx.workload_profile_mutex);
> +               }
>                 dc_allow_idle_optimizations(dm->dc, true);
> +       }
>
>         mutex_unlock(&dm->dc_lock);
>
> --
> 2.34.1
>
From cf10811fb8ae40439484609e5808fe6e49969f29 Mon Sep 17 00:00:00 2001
From: Alex Deucher <[email protected]>
Date: Wed, 26 Mar 2025 10:26:25 -0400
Subject: [PATCH 1/2] drm/amdgpu/pm: add workload profile pause helper

To be used for display idle optimizations when
we want to pause non-default profiles.

Signed-off-by: Alex Deucher <[email protected]>
---
 .../gpu/drm/amd/include/kgd_pp_interface.h    |  1 +
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 19 +++++++++++++++++++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |  2 ++
 3 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 9189dcb651881..4424a44a2deda 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -414,6 +414,7 @@ struct amd_pm_funcs {
 	int (*set_pp_table)(void *handle, const char *buf, size_t size);
 	void (*debugfs_print_current_performance_level)(void *handle, struct seq_file *m);
 	int (*switch_power_profile)(void *handle, enum PP_SMC_POWER_PROFILE type, bool en);
+	int (*pause_power_profile)(void *handle, bool pause);
 /* export to amdgpu */
 	struct amd_vce_state *(*get_vce_clock_state)(void *handle, u32 idx);
 	int (*dispatch_tasks)(void *handle, enum amd_pp_task task_id,
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 0bcb82b2f3ae0..0e4d243641c8f 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -381,6 +381,25 @@ int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
 	return ret;
 }
 
+int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev,
+				   bool pause)
+{
+	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	int ret = 0;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	if (pp_funcs && pp_funcs->pause_power_profile) {
+		mutex_lock(&adev->pm.mutex);
+		ret = pp_funcs->pause_power_profile(
+			adev->powerplay.pp_handle, pause);
+		mutex_unlock(&adev->pm.mutex);
+	}
+
+	return ret;
+}
+
 int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
 			       uint32_t pstate)
 {
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 72565fab60673..df8850a01ea6b 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -410,6 +410,8 @@ int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
 int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
 				    enum PP_SMC_POWER_PROFILE type,
 				    bool en);
+int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev,
+				   bool pause);
 
 int amdgpu_dpm_baco_reset(struct amdgpu_device *adev);
 
-- 
2.49.0

From ec5d6e2037fdb34c5dde47557eb29c35017527d1 Mon Sep 17 00:00:00 2001
From: Alex Deucher <[email protected]>
Date: Wed, 26 Mar 2025 10:54:56 -0400
Subject: [PATCH 2/2] drm/amdgpu/pm/swsmu: implement pause workload profile

Add the callback for implementation for swsmu.

Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 36 ++++++++++++++++++-
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  1 +
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index a01b6244d99cd..d23cfd5ba8dc7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2398,7 +2398,11 @@ static int smu_switch_power_profile(void *handle,
 			smu_power_profile_mode_get(smu, type);
 		else
 			smu_power_profile_mode_put(smu, type);
-		ret = smu_bump_power_profile_mode(smu, NULL, 0);
+		/* don't switch the active workload when paused */
+		if (smu->pause_workload)
+			ret = 0;
+		else
+			ret = smu_bump_power_profile_mode(smu, NULL, 0);
 		if (ret) {
 			if (enable)
 				smu_power_profile_mode_put(smu, type);
@@ -2411,6 +2415,35 @@ static int smu_switch_power_profile(void *handle,
 	return 0;
 }
 
+static int smu_pause_power_profile(void *handle,
+				   bool pause)
+{
+	struct smu_context *smu = handle;
+	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+	u32 workload_mask = 1 << PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+	int ret;
+
+	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+		return -EOPNOTSUPP;
+
+	if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
+	    smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
+		smu->pause_workload = pause;
+
+		/* force to bootup default profile */
+		if (smu->pause_workload && smu->ppt_funcs->set_power_profile_mode)
+			ret = smu->ppt_funcs->set_power_profile_mode(smu,
+								     workload_mask,
+								     NULL,
+								     0);
+		else
+			ret = smu_bump_power_profile_mode(smu, NULL, 0);
+		return ret;
+	}
+
+	return 0;
+}
+
 static enum amd_dpm_forced_level smu_get_performance_level(void *handle)
 {
 	struct smu_context *smu = handle;
@@ -3759,6 +3792,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
 	.get_pp_table            = smu_sys_get_pp_table,
 	.set_pp_table            = smu_sys_set_pp_table,
 	.switch_power_profile    = smu_switch_power_profile,
+	.pause_power_profile     = smu_pause_power_profile,
 	/* export to amdgpu */
 	.dispatch_tasks          = smu_handle_dpm_task,
 	.load_firmware           = smu_load_microcode,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 27cecf9688ccb..a855ae249c739 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -560,6 +560,7 @@ struct smu_context {
 
 	/* asic agnostic workload mask */
 	uint32_t workload_mask;
+	bool pause_workload;
 	/* default/user workload preference */
 	uint32_t power_profile_mode;
 	uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT];
-- 
2.49.0

Reply via email to