Module: Mesa Branch: main Commit: 5371fca829d16e778e9c29a0708cd3185997f9ff URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5371fca829d16e778e9c29a0708cd3185997f9ff
Author: Pierre-Eric Pelloux-Prayer <[email protected]> Date: Tue Oct 3 11:06:52 2023 +0200 radeonsi/sqtt: handle COMPUTE queues as well Use cs_get_ip_type to support both type of queues instead of restricting ourselves to GFX. Reviewed-by: Marek Olšák <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26774> --- src/gallium/drivers/radeonsi/si_sqtt.c | 83 +++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index 61c7216bf6f..48b5b981474 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -4,6 +4,7 @@ * SPDX-License-Identifier: MIT */ +#include "amd_family.h" #include "si_build_pm4.h" #include "si_pipe.h" @@ -49,7 +50,7 @@ static bool si_sqtt_init_bo(struct si_context *sctx) static void si_emit_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs, - uint32_t queue_family_index) + enum amd_ip_type ip_type) { struct si_screen *sscreen = sctx->screen; uint32_t shifted_size = sctx->sqtt->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; @@ -216,7 +217,7 @@ static void si_emit_sqtt_start(struct si_context *sctx, S_030800_INSTANCE_BROADCAST_WRITES(1)); /* Start the thread trace with a different event based on the queue. */ - if (queue_family_index == AMD_IP_COMPUTE) { + if (ip_type == AMD_IP_COMPUTE) { radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(1)); } else { @@ -310,13 +311,13 @@ static void si_copy_sqtt_info_regs(struct si_context *sctx, } static void si_emit_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs, - uint32_t queue_family_index) + enum amd_ip_type ip_type) { unsigned max_se = sctx->screen->info.max_se; radeon_begin(cs); /* Stop the thread trace with a different event based on the queue. */ - if (queue_family_index == AMD_IP_COMPUTE) { + if (ip_type == AMD_IP_COMPUTE) { radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(0)); } else { @@ -411,14 +412,14 @@ static void si_emit_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs, radeon_end(); } -static void si_sqtt_start(struct si_context *sctx, int family, - struct radeon_cmdbuf *cs) +static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs) { struct radeon_winsys *ws = sctx->ws; + enum amd_ip_type ip_type = sctx->ws->cs_get_ip_type(cs); radeon_begin(cs); - switch (family) { + switch (ip_type) { case AMD_IP_GFX: radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); @@ -428,6 +429,9 @@ static void si_sqtt_start(struct si_context *sctx, int family, radeon_emit(PKT3(PKT3_NOP, 0, 0)); radeon_emit(0); break; + default: + /* Unsupported. */ + assert(false); } radeon_end(); @@ -457,20 +461,20 @@ static void si_sqtt_start(struct si_context *sctx, int family, si_emit_spm_setup(sctx, cs); } - si_emit_sqtt_start(sctx, cs, family); + si_emit_sqtt_start(sctx, cs, ip_type); if (sctx->spm.bo) si_pc_emit_spm_start(cs); } -static void si_sqtt_stop(struct si_context *sctx, int family, - struct radeon_cmdbuf *cs) +static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs) { struct radeon_winsys *ws = sctx->ws; + enum amd_ip_type ip_type = sctx->ws->cs_get_ip_type(cs); radeon_begin(cs); - switch (family) { + switch (ip_type) { case AMD_IP_GFX: radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); @@ -480,6 +484,9 @@ static void si_sqtt_stop(struct si_context *sctx, int family, radeon_emit(PKT3(PKT3_NOP, 0, 0)); radeon_emit(0); break; + default: + /* Unsupported. */ + assert(false); } radeon_end(); @@ -503,7 +510,7 @@ static void si_sqtt_stop(struct si_context *sctx, int family, SI_CONTEXT_PFP_SYNC_ME; sctx->emit_cache_flush(sctx, cs); - si_emit_sqtt_stop(sctx, cs, family); + si_emit_sqtt_stop(sctx, cs, ip_type); if (sctx->spm.bo) si_pc_emit_spm_reset(cs); @@ -518,40 +525,40 @@ static void si_sqtt_init_cs(struct si_context *sctx) { struct radeon_winsys *ws = sctx->ws; - /* Thread trace start CS (only handles AMD_IP_GFX). */ - sctx->sqtt->start_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf); - if (!ws->cs_create(sctx->sqtt->start_cs[AMD_IP_GFX], sctx->ctx, AMD_IP_GFX, - NULL, NULL)) { - free(sctx->sqtt->start_cs[AMD_IP_GFX]); - sctx->sqtt->start_cs[AMD_IP_GFX] = NULL; - return; - } - - si_sqtt_start(sctx, AMD_IP_GFX, sctx->sqtt->start_cs[AMD_IP_GFX]); + for (unsigned i = 0; i < ARRAY_SIZE(sctx->sqtt->start_cs); i++) { + sctx->sqtt->start_cs[i] = CALLOC_STRUCT(radeon_cmdbuf); + if (!ws->cs_create(sctx->sqtt->start_cs[i], sctx->ctx, (enum amd_ip_type)i, + NULL, NULL)) { + free(sctx->sqtt->start_cs[i]); + sctx->sqtt->start_cs[i] = NULL; + return; + } + si_sqtt_start(sctx, sctx->sqtt->start_cs[i]); + + sctx->sqtt->stop_cs[i] = CALLOC_STRUCT(radeon_cmdbuf); + if (!ws->cs_create(sctx->sqtt->stop_cs[i], sctx->ctx, (enum amd_ip_type)i, + NULL, NULL)) { + ws->cs_destroy(sctx->sqtt->start_cs[i]); + free(sctx->sqtt->start_cs[i]); + sctx->sqtt->start_cs[i] = NULL; + free(sctx->sqtt->stop_cs[i]); + sctx->sqtt->stop_cs[i] = NULL; + return; + } - /* Thread trace stop CS. */ - sctx->sqtt->stop_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf); - if (!ws->cs_create(sctx->sqtt->stop_cs[AMD_IP_GFX], sctx->ctx, AMD_IP_GFX, - NULL, NULL)) { - free(sctx->sqtt->start_cs[AMD_IP_GFX]); - sctx->sqtt->start_cs[AMD_IP_GFX] = NULL; - free(sctx->sqtt->stop_cs[AMD_IP_GFX]); - sctx->sqtt->stop_cs[AMD_IP_GFX] = NULL; - return; + si_sqtt_stop(sctx, sctx->sqtt->stop_cs[i]); } - - si_sqtt_stop(sctx, AMD_IP_GFX, sctx->sqtt->stop_cs[AMD_IP_GFX]); } static void si_begin_sqtt(struct si_context *sctx, struct radeon_cmdbuf *rcs) { - struct radeon_cmdbuf *cs = sctx->sqtt->start_cs[AMD_IP_GFX]; + struct radeon_cmdbuf *cs = sctx->sqtt->start_cs[sctx->ws->cs_get_ip_type(rcs)]; sctx->ws->cs_flush(cs, 0, NULL); } static void si_end_sqtt(struct si_context *sctx, struct radeon_cmdbuf *rcs) { - struct radeon_cmdbuf *cs = sctx->sqtt->stop_cs[AMD_IP_GFX]; + struct radeon_cmdbuf *cs = sctx->sqtt->stop_cs[sctx->ws->cs_get_ip_type(rcs)]; sctx->ws->cs_flush(cs, 0, &sctx->last_sqtt_fence); } @@ -668,8 +675,10 @@ void si_destroy_sqtt(struct si_context *sctx) if (sctx->sqtt->trigger_file) free(sctx->sqtt->trigger_file); - sscreen->ws->cs_destroy(sctx->sqtt->start_cs[AMD_IP_GFX]); - sscreen->ws->cs_destroy(sctx->sqtt->stop_cs[AMD_IP_GFX]); + for (int i = 0; i < ARRAY_SIZE(sctx->sqtt->start_cs); i++) { + sscreen->ws->cs_destroy(sctx->sqtt->start_cs[i]); + sscreen->ws->cs_destroy(sctx->sqtt->stop_cs[i]); + } struct rgp_pso_correlation *pso_correlation = &sctx->sqtt->rgp_pso_correlation;
