From: Nicolai Hähnle <[email protected]>

Signed-off-by: Nicolai Hähnle <[email protected]>
---
 src/app/print_waves.c | 40 +++++++++++++++++++++++++++++++++++++++-
 src/lib/read_gpr.c    | 30 ++++++++++++++++++++++++++++++
 src/umr.h             |  1 +
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index a9aaf39..a72d224 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -29,20 +29,22 @@
 
 #define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | 
", #x, (unsigned)ws.x); 
 #define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx 
| ", #x, (unsigned long)ws.x);
 
 #define H(x) if (col) { printf("\n"); }; col = 0; printf("\n\n%s:\n\t", x);
 #define Hv(x, y) if (col) { printf("\n"); }; col = 0; 
printf("\n\n%s[%08lx]:\n\t", x, (unsigned long)y);
 
 void umr_print_waves(struct umr_asic *asic)
 {
        uint32_t x, se, sh, cu, simd, wave, sgprs[1024], shift, opcodes[8];
+       uint32_t vgprs[64 * 256];
+       uint32_t thread;
        uint64_t pgm_addr;
        struct umr_wave_status ws;
        int first = 1, col = 0;
 
        if (asic->options.halt_waves)
                umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
 
        if (asic->family <= FAMILY_CIK)
                shift = 3;  // on SI..CIK allocations were done in 8-dword 
blocks
        else
@@ -50,24 +52,36 @@ void umr_print_waves(struct umr_asic *asic)
 
        for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
        for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
        for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
                umr_get_wave_sq_info(asic, se, sh, cu, &ws);
                if (ws.sq_info.busy) {
                        for (simd = 0; simd < 4; simd++)
                        for (wave = 0; wave < 10; wave++) { //both simd/wave 
are hard coded at the moment...
                                umr_get_wave_status(asic, se, sh, cu, simd, 
wave, &ws);
                                if (ws.wave_status.halt || 
ws.wave_status.valid) {
+                                       unsigned have_vgprs = 0;
+
                                        // grab sgprs..
-                                       if (ws.wave_status.halt)
+                                       if (ws.wave_status.halt) {
                                                umr_read_sgprs(asic, &ws, 
&sgprs[0]);
 
+                                               if (options.bitfields) {
+                                                       have_vgprs = 1;
+                                                       for (thread = 0; thread 
< 64; ++thread) {
+                                                               if 
(umr_read_vgprs(asic, &ws, thread,
+                                                                               
   &vgprs[256 * thread]) < 0)
+                                                                       
have_vgprs = 0;
+                                                       }
+                                               }
+                                       }
+
                                        if (!options.bitfields && first) {
                                                first = 0;
                                                printf("SE SH CU SIMD WAVE# 
WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC 
LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
                                        }
                                        if (!options.bitfields) {
                                        printf(
 "%u %u %u %u %u " // se/sh/cu/simd/wave
 "%08lx %08lx %08lx " // wave_status pc/hi/lo
 "%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo
 "%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS
@@ -164,20 +178,44 @@ void umr_print_waves(struct umr_asic *asic)
                                                        for (x = 0; x < 
((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
                                                                
printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
                                                                        
(unsigned)(x),
                                                                        
(unsigned)(x + 3),
                                                                        
(unsigned long)sgprs[x],
                                                                        
(unsigned long)sgprs[x+1],
                                                                        
(unsigned long)sgprs[x+2],
                                                                        
(unsigned long)sgprs[x+3]);
                                                }
 
+
+                                               if (have_vgprs) {
+                                                       printf("\n");
+                                                       for (x = 0; x < 
((ws.gpr_alloc.vgpr_size + 1) << 2); ++x) {
+                                                               if (x % 16 == 
0) {
+                                                                       if (x 
== 0)
+                                                                               
printf("VGPRS:       ");
+                                                                       else
+                                                                               
printf("             ");
+                                                                       for 
(thread = 0; thread < 64; ++thread) {
+                                                                               
unsigned live = thread < 32 ? (ws.exec_lo & (1u << thread))
+                                                                               
                            : (ws.exec_hi & (1u << (thread - 32)));
+                                                                               
printf(live ? " t%02u     " : " (t%02u)   ", thread);
+                                                                       }
+                                                                       
printf("\n");
+                                                               }
+
+                                                               printf("    
[%3u] = {", x);
+                                                               for (thread = 
0; thread < 64; ++thread)
+                                                                       
printf(" %08x", vgprs[thread * 256 + x]);
+                                                               printf(" }\n");
+                                                       }
+                                               }
+
                                                printf("\n\nPGM_MEM:\n");
                                                pgm_addr = (((uint64_t)ws.pc_hi 
<< 32) | ws.pc_lo) - (sizeof(opcodes)/2);
                                                umr_read_vram(asic, 
ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes);
                                                for (x = 0; x < 
sizeof(opcodes)/4; x++) {
                                                        if (x == 
(sizeof(opcodes)/8))
                                                                printf("*\t");
                                                        else
                                                                printf("\t");
                                                        printf("pgm[%lu@%llx] = 
%08lx\n",
                                                                (unsigned 
long)ws.hw_id.vm_id,
diff --git a/src/lib/read_gpr.c b/src/lib/read_gpr.c
index 427cfc5..669a49b 100644
--- a/src/lib/read_gpr.c
+++ b/src/lib/read_gpr.c
@@ -74,10 +74,40 @@ int umr_read_sgprs(struct umr_asic *asic, struct 
umr_wave_status *ws, uint32_t *
                lseek(asic->fd.gpr, addr, SEEK_SET);
                return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.sgpr_size + 
1) << shift));
        } else {
                umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, 
ws->hw_id.cu_id);
                wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, 
ws->hw_id.wave_id, 0, 0,
                                        (ws->gpr_alloc.sgpr_size + 1) << shift, 
dst);
                umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
                return 0;
        }
 }
+
+
+int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t 
thread, uint32_t *dst)
+{
+       uint64_t addr;
+
+       if (asic->family < FAMILY_AI)
+               return -1;
+
+       if (!asic->options.no_kernel) {
+               addr =
+                       (0ULL << 60)                             | // reading 
VGPRs
+                       ((uint64_t)0)                            | // starting 
address to read from
+                       ((uint64_t)ws->hw_id.se_id << 12)        |
+                       ((uint64_t)ws->hw_id.sh_id << 20)        |
+                       ((uint64_t)ws->hw_id.cu_id << 28)        |
+                       ((uint64_t)ws->hw_id.wave_id << 36)      |
+                       ((uint64_t)ws->hw_id.simd_id << 44)      |
+                       ((uint64_t)thread << 52);
+
+               lseek(asic->fd.gpr, addr, SEEK_SET);
+               return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.vgpr_size + 
1) << 2));
+       } else {
+               umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, 
ws->hw_id.cu_id);
+               wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, 
ws->hw_id.wave_id, thread, 0x400,
+                                       (ws->gpr_alloc.vgpr_size + 1) << 2, 
dst);
+               umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+               return 0;
+       }
+}
diff --git a/src/umr.h b/src/umr.h
index e49c80c..3d2252e 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -494,20 +494,21 @@ void umr_free_asic(struct umr_asic *asic);
 void umr_free_maps(struct umr_asic *asic);
 void umr_close_asic(struct umr_asic *asic); // call this to close a fully open 
asic
 int umr_query_drm(struct umr_asic *asic, int field, void *ret, int size);
 void umr_enumerate_devices(void);
 int umr_update(struct umr_asic *asic, char *script);
 
 /* lib helpers */
 int umr_get_wave_status(struct umr_asic *asic, unsigned se, unsigned sh, 
unsigned cu, unsigned simd, unsigned wave, struct umr_wave_status *ws);
 int umr_get_wave_sq_info(struct umr_asic *asic, unsigned se, unsigned sh, 
unsigned cu, struct umr_wave_status *ws);
 int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t 
*dst);
+int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t 
thread, uint32_t *dst);
 int umr_read_sensor(struct umr_asic *asic, int sensor, void *dst, int *size);
 
 /* mmio helpers */
 // init the mmio lookup table
 int umr_create_mmio_accel(struct umr_asic *asic);
 
 // find the word address of a register
 uint32_t umr_find_reg(struct umr_asic *asic, char *regname);
 
 // find the register data for a register
-- 
2.11.0

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to