Module: Mesa
Branch: main
Commit: 2efd1baa6446bed0426058eb0424ba40112c347c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2efd1baa6446bed0426058eb0424ba40112c347c

Author: Pierre-Eric Pelloux-Prayer <[email protected]>
Date:   Tue Oct  3 10:44:02 2023 +0200

radeonsi/sqtt: fix capturing RGP on RDNA3 with more than one Shader Engine

Based on radv 2cc981a0cd548ecce.

Reviewed-by: Marek Olšák <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26774>

---

 src/gallium/drivers/radeonsi/si_sqtt.c | 62 +++++++++++++---------------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c 
b/src/gallium/drivers/radeonsi/si_sqtt.c
index 1b4041d16a0..61c7216bf6f 100644
--- a/src/gallium/drivers/radeonsi/si_sqtt.c
+++ b/src/gallium/drivers/radeonsi/si_sqtt.c
@@ -47,18 +47,6 @@ static bool si_sqtt_init_bo(struct si_context *sctx)
    return true;
 }
 
-static bool
-si_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se)
-{
-   /* FIXME: SQTT only works on SE0 for some unknown reasons. See RADV for the
-    * solution */
-   if (info->gfx_level == GFX11)
-      return se != 0;
-
-   /* No active CU on the SE means it is disabled. */
-   return info->cu_mask[se][0] == 0;
-}
-
 static void si_emit_sqtt_start(struct si_context *sctx,
                                struct radeon_cmdbuf *cs,
                                uint32_t queue_family_index)
@@ -75,13 +63,13 @@ static void si_emit_sqtt_start(struct si_context *sctx,
          ac_sqtt_get_data_va(&sctx->screen->info, sctx->sqtt, va, se);
       uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
 
-      if (si_sqtt_se_is_disabled(&sctx->screen->info, se))
+      if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
          continue;
 
       /* Target SEx and SH0. */
-      radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
-                             S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) |
-                             S_030800_INSTANCE_BROADCAST_WRITES(1));
+      radeon_set_uconfig_perfctr_reg_seq(R_030800_GRBM_GFX_INDEX, 1);
+      radeon_emit(S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) |
+                  S_030800_INSTANCE_BROADCAST_WRITES(1));
 
       /* Select the first active CUs */
       int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
@@ -99,21 +87,17 @@ static void si_emit_sqtt_start(struct si_context *sctx,
             /* Disable unsupported hw shader stages */
             shader_mask &= ~(0x02 /* VS */ | 0x08 /* ES */ | 0x20 /* LS */);
 
-            radeon_set_uconfig_reg(R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
-                                   S_0367A4_SIZE(shifted_size) |
-                                   S_0367A4_BASE_HI(shifted_va >> 32));
-
-            radeon_set_uconfig_reg(R_0367A0_SQ_THREAD_TRACE_BUF0_BASE, 
shifted_va);
-
-            radeon_set_uconfig_reg(R_0367B4_SQ_THREAD_TRACE_MASK,
-                                   S_0367B4_WTYPE_INCLUDE(shader_mask) |
-                                   S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(wgp) |
-                                   S_0367B4_SIMD_SEL(0));
-
-            radeon_set_uconfig_reg(
-               R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK,
-               S_0367B8_REG_INCLUDE(token_mask) |
-               S_0367B8_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
+            
radeon_set_uconfig_perfctr_reg_seq(R_0367A0_SQ_THREAD_TRACE_BUF0_BASE, 2);
+            radeon_emit(shifted_va);
+            radeon_emit(S_0367A4_SIZE(shifted_size) |
+                        S_0367A4_BASE_HI(shifted_va >> 32));
+
+            radeon_set_uconfig_perfctr_reg_seq(R_0367B4_SQ_THREAD_TRACE_MASK, 
2);
+            radeon_emit(S_0367B4_WTYPE_INCLUDE(shader_mask) |
+                        S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(wgp) |
+                        S_0367B4_SIMD_SEL(0));
+            radeon_emit(S_0367B8_REG_INCLUDE(token_mask) |
+                        S_0367B8_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
          } else {
             radeon_set_privileged_config_reg(
                R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
@@ -157,7 +141,8 @@ static void si_emit_sqtt_start(struct si_context *sctx,
                ctrl |= S_0367B0_SPI_STALL_EN(1) |
                        S_0367B0_SQ_STALL_EN(1) |
                        S_0367B0_REG_AT_HWM(2);
-               radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, ctrl);
+               
radeon_set_uconfig_perfctr_reg_seq(R_0367B0_SQ_THREAD_TRACE_CTRL, 1);
+               radeon_emit(ctrl);
                break;
             default:
                assert(false);
@@ -328,7 +313,6 @@ static void si_emit_sqtt_stop(struct si_context *sctx, 
struct radeon_cmdbuf *cs,
                               uint32_t queue_family_index)
 {
    unsigned max_se = sctx->screen->info.max_se;
-
    radeon_begin(cs);
 
    /* Stop the thread trace with a different event based on the queue. */
@@ -353,7 +337,7 @@ static void si_emit_sqtt_stop(struct si_context *sctx, 
struct radeon_cmdbuf *cs,
    }
 
    for (unsigned se = 0; se < max_se; se++) {
-      if (si_sqtt_se_is_disabled(&sctx->screen->info, se))
+      if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
          continue;
 
       radeon_begin(cs);
@@ -381,11 +365,13 @@ static void si_emit_sqtt_stop(struct si_context *sctx, 
struct radeon_cmdbuf *cs,
          }
 
          /* Disable the thread trace mode. */
-         if (sctx->gfx_level >= GFX11)
-            radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, 
S_008D1C_MODE(0));
-         else
+         if (sctx->gfx_level >= GFX11) {
+            radeon_set_uconfig_perfctr_reg_seq(R_0367B0_SQ_THREAD_TRACE_CTRL, 
1);
+            radeon_emit(S_008D1C_MODE(0));
+         } else {
             radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
                                              S_008D1C_MODE(0));
+         }
 
          /* Wait for thread trace completion. */
          radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
@@ -590,7 +576,7 @@ static bool si_get_sqtt_trace(struct si_context *sctx,
          void *info_ptr = sqtt_ptr + info_offset;
          struct ac_sqtt_data_info *info = (struct ac_sqtt_data_info *)info_ptr;
 
-         if (si_sqtt_se_is_disabled(&sctx->screen->info, se))
+         if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
             continue;
 
          if (!ac_is_sqtt_complete(&sctx->screen->info, sctx->sqtt, info)) {

Reply via email to