[AMD Official Use Only - AMD Internal Distribution Only]
>-----Original Message-----
>From: amd-gfx <[email protected]> On Behalf Of Victor Zhao
>Sent: Friday, October 10, 2025 1:03 PM
>To: [email protected]
>Cc: Chang, HaiJun <[email protected]>; Zhao, Victor
><[email protected]>
>Subject: [PATCH v2 2/2] drm/amdgpu: use GPU_HDP_FLUSH for sriov
>
>Currently SRIOV runtime will use kiq to write HDP_MEM_FLUSH_CNTL for hdp
>flush. This register need to be write from CPU for nbif to aware, otherwise it
>will
>not work.
>
>Implement amdgpu_kiq_hdp_flush and use kiq to do gpu hdp flush during sriov
>runtime.
>
>v2:
>- fallback to amdgpu_asic_flush_hdp when amdgpu_kiq_hdp_flush failed
>- add function amdgpu_mes_hdp_flush
>
>Signed-off-by: Victor Zhao <[email protected]>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 71 ++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 12 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 +
> 5 files changed, 95 insertions(+), 3 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index 7a899fb4de29..65cc6f776536 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -7279,10 +7279,17 @@ void amdgpu_device_flush_hdp(struct
>amdgpu_device *adev,
> if (adev->gmc.xgmi.connected_to_cpu)
> return;
>
>- if (ring && ring->funcs->emit_hdp_flush)
>+ if (ring && ring->funcs->emit_hdp_flush) {
> amdgpu_ring_emit_hdp_flush(ring);
>- else
>- amdgpu_asic_flush_hdp(adev, ring);
>+ return;
>+ }
>+
>+ if (!ring && amdgpu_sriov_runtime(adev)) {
>+ if (!amdgpu_kiq_hdp_flush(adev))
>+ return;
>+ }
>+
>+ amdgpu_asic_flush_hdp(adev, ring);
> }
>
> void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, diff --git
>a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>index 7f02e36ccc1e..bf28e8ef6c14 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>@@ -33,6 +33,7 @@
> #include "amdgpu_reset.h"
> #include "amdgpu_xcp.h"
> #include "amdgpu_xgmi.h"
>+#include "amdgpu_mes.h"
> #include "nvd.h"
>
> /* delay 0.1 second to enable gfx off feature */ @@ -1194,6 +1195,75 @@ void
>amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3
> dev_err(adev->dev, "failed to write reg:%x\n", reg); }
>
>+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev) {
>+ signed long r, cnt = 0;
>+ unsigned long flags;
>+ uint32_t seq;
>+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
>+ struct amdgpu_ring *ring = &kiq->ring;
>+
>+ if (amdgpu_device_skip_hw_access(adev))
>+ return 0;
>+
>+ if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready)
>+ return amdgpu_mes_hdp_flush(adev);
>+
>+ if (!ring->funcs->emit_hdp_flush) {
>+ return -ENODEV;
[lijo]
May be EOPNOTSUPP - i.e., this ring doesn't support this operation?
Apart from that, the series look good.
Thanks,
Lijo
>+ }
>+
>+ spin_lock_irqsave(&kiq->ring_lock, flags);
>+ r = amdgpu_ring_alloc(ring, 32);
>+ if (r)
>+ goto failed_unlock;
>+
>+ amdgpu_ring_emit_hdp_flush(ring);
>+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
>+ if (r)
>+ goto failed_undo;
>+
>+ amdgpu_ring_commit(ring);
>+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
>+
>+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>+
>+ /* don't wait anymore for gpu reset case because this way may
>+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
>+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
>+ * never return if we keep waiting in virt_kiq_rreg, which cause
>+ * gpu_recover() hang there.
>+ *
>+ * also don't wait anymore for IRQ context
>+ * */
>+ if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
>+ goto failed_kiq_hdp_flush;
>+
>+ might_sleep();
>+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
>+ if (amdgpu_in_reset(adev))
>+ goto failed_kiq_hdp_flush;
>+
>+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
>+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>+ }
>+
>+ if (cnt > MAX_KIQ_REG_TRY) {
>+ dev_err(adev->dev, "failed to flush HDP via KIQ timeout\n");
>+ return -ETIMEDOUT;
>+ }
>+
>+ return 0;
>+
>+failed_undo:
>+ amdgpu_ring_undo(ring);
>+failed_unlock:
>+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
>+failed_kiq_hdp_flush:
>+ dev_err(adev->dev, "failed to flush HDP via KIQ\n");
>+ return r < 0 ? r : -EIO;
>+}
>+
> int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) {
> if (amdgpu_num_kcq == -1) {
>@@ -2484,3 +2554,4 @@ void
>amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
> &amdgpu_debugfs_compute_sched_mask_fops);
> #endif
> }
>+
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>index fb5f7a0ee029..efd61a1ccc66 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>@@ -615,6 +615,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device
>*adev,
> struct amdgpu_iv_entry *entry);
> uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t
>xcc_id); void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg,
>uint32_t v, uint32_t xcc_id);
>+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev);
> int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); void
>amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>index 8d03e8c9cc6d..be62681b0c3a 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>@@ -523,6 +523,18 @@ int amdgpu_mes_reg_write_reg_wait(struct
>amdgpu_device *adev,
> return r;
> }
>
>+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev) {
>+ uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
>+
>+ hdp_flush_req_offset = adev->nbio.funcs-
>>get_hdp_flush_req_offset(adev);
>+ hdp_flush_done_offset = adev->nbio.funcs-
>>get_hdp_flush_done_offset(adev);
>+ ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0;
>+
>+ return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset,
>hdp_flush_done_offset,
>+ ref_and_mask, ref_and_mask);
>+}
>+
> int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
> uint64_t process_context_addr,
> uint32_t spi_gdbg_per_vmid_cntl,
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>index 6b506fc72f58..3a51ace2fa14 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>@@ -427,6 +427,7 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, int
>amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
> uint32_t reg0, uint32_t reg1,
> uint32_t ref, uint32_t mask);
>+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev);
> int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
> uint64_t process_context_addr,
> uint32_t spi_gdbg_per_vmid_cntl,
>--
>2.25.1