Signed-off-by: Tom St Denis <[email protected]>
---
 doc/sphinx/source/libwave_status.rst |  28 +++
 src/app/print_waves.c                | 379 +++++++++++++++++------------------
 src/lib/CMakeLists.txt               |   1 +
 src/lib/scan_waves.c                 |  97 +++++++++
 src/umr.h                            |   9 +
 5 files changed, 314 insertions(+), 200 deletions(-)
 create mode 100644 src/lib/scan_waves.c

diff --git a/doc/sphinx/source/libwave_status.rst 
b/doc/sphinx/source/libwave_status.rst
index 0f58a3c91855..2e16a4ac0cb6 100644
--- a/doc/sphinx/source/libwave_status.rst
+++ b/doc/sphinx/source/libwave_status.rst
@@ -43,6 +43,34 @@ can be read with the following function:
 This will populate many of the fields of the structure 'umr_wave_status'.  An
 example of reading them can be found in src/app/print_waves.c.
 
+---------------------
+Scanning Halted Waves
+---------------------
+
+If the waves have been halted (say with the function umr_sq_cmd_halt_waves()) 
then
+a list of halted valid waves can be made with the following function:
+
+
+::
+
+       struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic)
+
+This will return NULL on error (or no halted waves) or a pointer
+to the following structure:
+
+::
+
+       struct umr_wave_data {
+               uint32_t vgprs[64 * 256], sgprs[1024];
+               int se, sh, cu, simd, wave, have_vgprs;
+               struct umr_wave_status ws;
+               struct umr_wave_thread *threads;
+               struct umr_wave_data *next;
+       };
+
+The list of waves are stored as a linked list terminated by the
+last node having 'next' point to NULL.
+
 ------------
 Reading GPRs
 ------------
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index 563fc65bedb7..d901bc902ff3 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -24,11 +24,11 @@
  */
 #include "umrapp.h"
 
-#define PP(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: 
%8u | ", #y, (unsigned)ws.x.y); 
-#define PX(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: 
%08lx | ", #y, (unsigned long)ws.x.y);
+#define PP(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: 
%8u | ", #y, (unsigned)wd->ws.x.y);
+#define PX(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: 
%08lx | ", #y, (unsigned long)wd->ws.x.y);
 
-#define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | 
", #x, (unsigned)ws.x); 
-#define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx 
| ", #x, (unsigned long)ws.x);
+#define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | 
", #x, (unsigned)wd->ws.x);
+#define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx 
| ", #x, (unsigned long)wd->ws.x);
 
 #define H(x) if (col) { printf("\n"); }; col = 0; printf("\n\n%s:\n\t", x);
 #define Hv(x, y) if (col) { printf("\n"); }; col = 0; 
printf("\n\n%s[%08lx]:\n\t", x, (unsigned long)y);
@@ -37,11 +37,9 @@
 
 void umr_print_waves(struct umr_asic *asic)
 {
-       uint32_t x, y, se, sh, cu, simd, wave, sgprs[1024], shift;
-       uint32_t vgprs[64 * 256];
-       uint32_t thread;
+       uint32_t x, y, shift, thread;
        uint64_t pgm_addr;
-       struct umr_wave_status ws;
+       struct umr_wave_data *wd, *owd;
        int first = 1, col = 0;
 
        if (asic->options.halt_waves)
@@ -52,228 +50,209 @@ void umr_print_waves(struct umr_asic *asic)
        else
                shift = 4;  // on VI allocations are in 16-dword blocks
 
-       for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
-       for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
-       for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
-               umr_get_wave_sq_info(asic, se, sh, cu, &ws);
-               if (ws.sq_info.busy) {
-                       for (simd = 0; simd < 4; simd++)
-                       for (wave = 0; wave < 10; wave++) { //both simd/wave 
are hard coded at the moment...
-                               umr_get_wave_status(asic, se, sh, cu, simd, 
wave, &ws);
-                               if (ws.wave_status.halt || 
ws.wave_status.valid) {
-                                       unsigned have_vgprs = 0;
-
-                                       // grab sgprs..
-                                       if (ws.wave_status.halt) {
-                                               umr_read_sgprs(asic, &ws, 
&sgprs[0]);
-
-                                               if (asic->options.bitfields) {
-                                                       have_vgprs = 1;
-                                                       for (thread = 0; thread 
< 64; ++thread) {
-                                                               if 
(umr_read_vgprs(asic, &ws, thread,
-                                                                               
   &vgprs[256 * thread]) < 0)
-                                                                       
have_vgprs = 0;
-                                                       }
-                                               }
-                                       }
-
-                                       if (!asic->options.bitfields && first) {
-                                               first = 0;
-                                               printf("SE SH CU SIMD WAVE# 
WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC 
LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
-                                       }
-                                       if (!asic->options.bitfields) {
-                                       printf(
+       owd = wd = umr_scan_wave_data(asic);
+       while (wd) {
+               if (!asic->options.bitfields && first) {
+                       first = 0;
+                       printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO 
INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI 
TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
+               }
+               if (!asic->options.bitfields) {
+               printf(
 "%u %u %u %u %u " // se/sh/cu/simd/wave
 "%08lx %08lx %08lx " // wave_status pc/hi/lo
 "%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo
 "%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS
 "%08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 
M0\n");
 "\n",
-(unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, 
(unsigned)ws.hw_id.wave_id,
-(unsigned long)ws.wave_status.value, (unsigned long)ws.pc_hi, (unsigned 
long)ws.pc_lo,
-(unsigned long)ws.wave_inst_dw0, (unsigned long)ws.wave_inst_dw1, (unsigned 
long)ws.exec_hi, (unsigned long)ws.exec_lo,
-(unsigned long)ws.hw_id.value, (unsigned long)ws.gpr_alloc.value, (unsigned 
long)ws.lds_alloc.value, (unsigned long)ws.trapsts.value, (unsigned 
long)ws.ib_sts.value,
-(unsigned long)ws.tba_hi, (unsigned long)ws.tba_lo, (unsigned long)ws.tma_hi, 
(unsigned long)ws.tma_lo, (unsigned long)ws.ib_dbg0, (unsigned long)ws.m0
+(unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, 
(unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id,
+(unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, 
(unsigned long)wd->ws.pc_lo,
+(unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.wave_inst_dw1, 
(unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo,
+(unsigned long)wd->ws.hw_id.value, (unsigned long)wd->ws.gpr_alloc.value, 
(unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value, 
(unsigned long)wd->ws.ib_sts.value,
+(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned 
long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned 
long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0
 );
-                                               if (ws.wave_status.halt) {
-                                                       for (x = 0; x < 
((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
-                                                               
printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
-                                                                       
(unsigned)(x),
-                                                                       
(unsigned)(x + 3),
-                                                                       
(unsigned long)sgprs[x],
-                                                                       
(unsigned long)sgprs[x+1],
-                                                                       
(unsigned long)sgprs[x+2],
-                                                                       
(unsigned long)sgprs[x+3]);
-
-                                                       if 
(ws.wave_status.trap_en || ws.wave_status.priv) {
-                                                               for (y = 0, x = 
0x6C; x < (16 + 0x6C); x += 4) {
-                                                                       
printf(">%s[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
-                                                                               
(x < (0x6C + 4) && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP",
-                                                                               
(unsigned)(y),
-                                                                               
(unsigned)(y + 3),
-                                                                               
(unsigned long)sgprs[x],
-                                                                               
(unsigned long)sgprs[x+1],
-                                                                               
(unsigned long)sgprs[x+2],
-                                                                               
(unsigned long)sgprs[x+3]);
+                       if (wd->ws.wave_status.halt) {
+                               for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 
1) << shift); x += 4)
+                                       printf(">SGPRS[%u..%u] = { %08lx, 
%08lx, %08lx, %08lx }\n",
+                                               (unsigned)(x),
+                                               (unsigned)(x + 3),
+                                               (unsigned long)wd->sgprs[x],
+                                               (unsigned long)wd->sgprs[x+1],
+                                               (unsigned long)wd->sgprs[x+2],
+                                               (unsigned long)wd->sgprs[x+3]);
 
-                                                                       // 
restart numbering on SI..VI with TTMP0
-                                                                       y += 4;
-                                                                       if (x 
== 0x6C && asic->family <= FAMILY_VI)
-                                                                               
y = 0;
-                                                               }
-                                                       }
-                                               }
+                               if (wd->ws.wave_status.trap_en || 
wd->ws.wave_status.priv) {
+                                       for (y = 0, x = 0x6C; x < (16 + 0x6C); 
x += 4) {
+                                               printf(">%s[%u..%u] = { %08lx, 
%08lx, %08lx, %08lx }\n",
+                                                       (x < (0x6C + 4) && 
asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP",
+                                                       (unsigned)(y),
+                                                       (unsigned)(y + 3),
+                                                       (unsigned 
long)wd->sgprs[x],
+                                                       (unsigned 
long)wd->sgprs[x+1],
+                                                       (unsigned 
long)wd->sgprs[x+2],
+                                                       (unsigned 
long)wd->sgprs[x+3]);
 
-                                               pgm_addr = (((uint64_t)ws.pc_hi 
<< 32) | ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
-                                               umr_vm_disasm(asic, 
ws.hw_id.vm_id, pgm_addr, (((uint64_t)ws.pc_hi << 32) | ws.pc_lo), 
NUM_OPCODE_WORDS*4);
-                                       } else {
-                                               first = 0;
-                                               
printf("\n------------------------------------------------------\nse%u.sh%u.cu%u.simd%u.wave%u\n",
-                                               (unsigned)se, (unsigned)sh, 
(unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id);
+                                               // restart numbering on SI..VI 
with TTMP0
+                                               y += 4;
+                                               if (x == 0x6C && asic->family 
<= FAMILY_VI)
+                                                       y = 0;
+                                       }
+                               }
+                       }
 
-                                               H("Main Registers");
-                                               X(pc_hi);
-                                               X(pc_lo);
-                                               X(wave_inst_dw0);
-                                               X(wave_inst_dw1);
-                                               X(exec_hi);
-                                               X(exec_lo);
-                                               X(tba_hi);
-                                               X(tba_lo);
-                                               X(tma_hi);
-                                               X(tma_lo);
-                                               X(m0);
-                                               X(ib_dbg0);
+                       pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | 
wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
+                       umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, 
(((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4);
+               } else {
+                       first = 0;
+                       
printf("\n------------------------------------------------------\nse%u.sh%u.cu%u.simd%u.wave%u\n",
+                       (unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, 
(unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id);
 
-                                               Hv("Wave_Status", 
ws.wave_status.value);
-                                               PP(wave_status, scc);
-                                               PP(wave_status, execz);
-                                               PP(wave_status, vccz);
-                                               PP(wave_status, in_tg);
-                                               PP(wave_status, halt);
-                                               PP(wave_status, valid);
-                                               PP(wave_status, spi_prio);
-                                               PP(wave_status, wave_prio);
-                                               PP(wave_status, priv);
-                                               PP(wave_status, trap_en);
-                                               PP(wave_status, trap);
-                                               PP(wave_status, ttrace_en);
-                                               PP(wave_status, export_rdy);
-                                               PP(wave_status, in_barrier);
-                                               PP(wave_status, ecc_err);
-                                               PP(wave_status, skip_export);
-                                               PP(wave_status, perf_en);
-                                               PP(wave_status, cond_dbg_user);
-                                               PP(wave_status, cond_dbg_sys);
-                                               PP(wave_status, data_atc);
-                                               PP(wave_status, inst_atc);
-                                               PP(wave_status, 
dispatch_cache_ctrl);
-                                               PP(wave_status, must_export);
+                       H("Main Registers");
+                       X(pc_hi);
+                       X(pc_lo);
+                       X(wave_inst_dw0);
+                       X(wave_inst_dw1);
+                       X(exec_hi);
+                       X(exec_lo);
+                       X(tba_hi);
+                       X(tba_lo);
+                       X(tma_hi);
+                       X(tma_lo);
+                       X(m0);
+                       X(ib_dbg0);
 
-                                               Hv("HW_ID", ws.hw_id.value);
-                                               PP(hw_id, wave_id);
-                                               PP(hw_id, simd_id);
-                                               PP(hw_id, pipe_id);
-                                               PP(hw_id, cu_id);
-                                               PP(hw_id, sh_id);
-                                               PP(hw_id, se_id);
-                                               PP(hw_id, tg_id);
-                                               PP(hw_id, vm_id);
-                                               PP(hw_id, queue_id);
-                                               PP(hw_id, state_id);
-                                               PP(hw_id, me_id);
+                       Hv("Wave_Status", wd->ws.wave_status.value);
+                       PP(wave_status, scc);
+                       PP(wave_status, execz);
+                       PP(wave_status, vccz);
+                       PP(wave_status, in_tg);
+                       PP(wave_status, halt);
+                       PP(wave_status, valid);
+                       PP(wave_status, spi_prio);
+                       PP(wave_status, wave_prio);
+                       PP(wave_status, priv);
+                       PP(wave_status, trap_en);
+                       PP(wave_status, trap);
+                       PP(wave_status, ttrace_en);
+                       PP(wave_status, export_rdy);
+                       PP(wave_status, in_barrier);
+                       PP(wave_status, ecc_err);
+                       PP(wave_status, skip_export);
+                       PP(wave_status, perf_en);
+                       PP(wave_status, cond_dbg_user);
+                       PP(wave_status, cond_dbg_sys);
+                       PP(wave_status, data_atc);
+                       PP(wave_status, inst_atc);
+                       PP(wave_status, dispatch_cache_ctrl);
+                       PP(wave_status, must_export);
 
-                                               Hv("GPR_ALLOC", 
ws.gpr_alloc.value);
-                                               PP(gpr_alloc, vgpr_base);
-                                               PP(gpr_alloc, vgpr_size);
-                                               PP(gpr_alloc, sgpr_base);
-                                               PP(gpr_alloc, sgpr_size);
+                       Hv("HW_ID", wd->ws.hw_id.value);
+                       PP(hw_id, wave_id);
+                       PP(hw_id, simd_id);
+                       PP(hw_id, pipe_id);
+                       PP(hw_id, cu_id);
+                       PP(hw_id, sh_id);
+                       PP(hw_id, se_id);
+                       PP(hw_id, tg_id);
+                       PP(hw_id, vm_id);
+                       PP(hw_id, queue_id);
+                       PP(hw_id, state_id);
+                       PP(hw_id, me_id);
 
-                                               if (ws.wave_status.halt) {
-                                                       printf("\n\nSGPRS:\n");
-                                                       for (x = 0; x < 
((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
-                                                               
printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
-                                                                       
(unsigned)(x),
-                                                                       
(unsigned)(x + 3),
-                                                                       
(unsigned long)sgprs[x],
-                                                                       
(unsigned long)sgprs[x+1],
-                                                                       
(unsigned long)sgprs[x+2],
-                                                                       
(unsigned long)sgprs[x+3]);
+                       Hv("GPR_ALLOC", wd->ws.gpr_alloc.value);
+                       PP(gpr_alloc, vgpr_base);
+                       PP(gpr_alloc, vgpr_size);
+                       PP(gpr_alloc, sgpr_base);
+                       PP(gpr_alloc, sgpr_size);
 
-                                                       if 
(ws.wave_status.trap_en || ws.wave_status.priv) {
-                                                               for (y  = 0, x 
= 0x6C; x < (16 + 0x6C); x += 4) {
-                                                                       // only 
print label once each
-                                                                       if 
((asic->family <= FAMILY_VI && x < 0x6C + 8) ||
-                                                                               
(asic->family > FAMILY_VI && x < 0x6C + 4))
-                                                                               
printf("\n%s:\n", (x < 0x6C + 4 && asic->family <= FAMILY_VI) ? "TBA/TMA" : 
"TTMP");
-                                                                       
printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
-                                                                               
(unsigned)(y),
-                                                                               
(unsigned)(y + 3),
-                                                                               
(unsigned long)sgprs[x],
-                                                                               
(unsigned long)sgprs[x+1],
-                                                                               
(unsigned long)sgprs[x+2],
-                                                                               
(unsigned long)sgprs[x+3]);
+                       if (wd->ws.wave_status.halt) {
+                               printf("\n\nSGPRS:\n");
+                               for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 
1) << shift); x += 4)
+                                       printf("\t[%4u..%4u] = { %08lx, %08lx, 
%08lx, %08lx }\n",
+                                               (unsigned)(x),
+                                               (unsigned)(x + 3),
+                                               (unsigned long)wd->sgprs[x],
+                                               (unsigned long)wd->sgprs[x+1],
+                                               (unsigned long)wd->sgprs[x+2],
+                                               (unsigned long)wd->sgprs[x+3]);
 
-                                                                       // 
reset count on SI..VI
-                                                                       y += 4;
-                                                                       if (x 
== 0x6C && asic->family <= FAMILY_VI)
-                                                                               
y = 0;
-                                                               }
-                                                       }
-                                               }
+                               if (wd->ws.wave_status.trap_en || 
wd->ws.wave_status.priv) {
+                                       for (y  = 0, x = 0x6C; x < (16 + 0x6C); 
x += 4) {
+                                               // only print label once each
+                                               if ((asic->family <= FAMILY_VI 
&& x < 0x6C + 8) ||
+                                                       (asic->family > 
FAMILY_VI && x < 0x6C + 4))
+                                                       printf("\n%s:\n", (x < 
0x6C + 4 && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP");
+                                               printf("\t[%4u..%4u] = { %08lx, 
%08lx, %08lx, %08lx }\n",
+                                                       (unsigned)(y),
+                                                       (unsigned)(y + 3),
+                                                       (unsigned 
long)wd->sgprs[x],
+                                                       (unsigned 
long)wd->sgprs[x+1],
+                                                       (unsigned 
long)wd->sgprs[x+2],
+                                                       (unsigned 
long)wd->sgprs[x+3]);
 
-                                               if (have_vgprs) {
-                                                       printf("\n");
-                                                       for (x = 0; x < 
((ws.gpr_alloc.vgpr_size + 1) << 2); ++x) {
-                                                               if (x % 16 == 
0) {
-                                                                       if (x 
== 0)
-                                                                               
printf("VGPRS:       ");
-                                                                       else
-                                                                               
printf("             ");
-                                                                       for 
(thread = 0; thread < 64; ++thread) {
-                                                                               
unsigned live = thread < 32 ? (ws.exec_lo & (1u << thread))
-                                                                               
                            : (ws.exec_hi & (1u << (thread - 32)));
-                                                                               
printf(live ? " t%02u     " : " (t%02u)   ", thread);
-                                                                       }
-                                                                       
printf("\n");
-                                                               }
+                                               // reset count on SI..VI
+                                               y += 4;
+                                               if (x == 0x6C && asic->family 
<= FAMILY_VI)
+                                                       y = 0;
+                                       }
+                               }
+                       }
 
-                                                               printf("    
[%3u] = {", x);
-                                                               for (thread = 
0; thread < 64; ++thread)
-                                                                       
printf(" %08x", vgprs[thread * 256 + x]);
-                                                               printf(" }\n");
-                                                       }
+                       if (wd->have_vgprs) {
+                               printf("\n");
+                               for (x = 0; x < ((wd->ws.gpr_alloc.vgpr_size + 
1) << 2); ++x) {
+                                       if (x % 16 == 0) {
+                                               if (x == 0)
+                                                       printf("VGPRS:       ");
+                                               else
+                                                       printf("             ");
+                                               for (thread = 0; thread < 64; 
++thread) {
+                                                       unsigned live = thread 
< 32 ? (wd->ws.exec_lo & (1u << thread))
+                                                                               
        : (wd->ws.exec_hi & (1u << (thread - 32)));
+                                                       printf(live ? " t%02u   
  " : " (t%02u)   ", thread);
                                                }
+                                               printf("\n");
+                                       }
 
-                                               printf("\n\nPGM_MEM:\n");
-                                               pgm_addr = (((uint64_t)ws.pc_hi 
<< 32) | ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
-                                               umr_vm_disasm(asic, 
ws.hw_id.vm_id, pgm_addr, (((uint64_t)ws.pc_hi << 32) | ws.pc_lo), 
NUM_OPCODE_WORDS*4);
+                                       printf("    [%3u] = {", x);
+                                       for (thread = 0; thread < 64; ++thread)
+                                               printf(" %08x", 
wd->vgprs[thread * 256 + x]);
+                                       printf(" }\n");
+                               }
+                       }
 
-                                               Hv("LDS_ALLOC", 
ws.lds_alloc.value);
-                                               PP(lds_alloc, lds_base);
-                                               PP(lds_alloc, lds_size);
+                       printf("\n\nPGM_MEM:\n");
+                       pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | 
wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
+                       umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, 
(((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4);
 
-                                               Hv("IB_STS", ws.ib_sts.value);
-                                               PP(ib_sts, vm_cnt);
-                                               PP(ib_sts, exp_cnt);
-                                               PP(ib_sts, lgkm_cnt);
-                                               PP(ib_sts, valu_cnt);
+                       Hv("LDS_ALLOC", wd->ws.lds_alloc.value);
+                       PP(lds_alloc, lds_base);
+                       PP(lds_alloc, lds_size);
 
-                                               Hv("TRAPSTS", ws.trapsts.value);
-                                               PP(trapsts, excp);
-                                               PP(trapsts, excp_cycle);
-                                               PP(trapsts, dp_rate);
+                       Hv("IB_STS", wd->ws.ib_sts.value);
+                       PP(ib_sts, vm_cnt);
+                       PP(ib_sts, exp_cnt);
+                       PP(ib_sts, lgkm_cnt);
+                       PP(ib_sts, valu_cnt);
 
-                                               printf("\n"); col = 0;
-                                       }
+                       Hv("TRAPSTS", wd->ws.trapsts.value);
+                       PP(trapsts, excp);
+                       PP(trapsts, excp_cycle);
+                       PP(trapsts, dp_rate);
 
-                               }
-                       }
+                       printf("\n"); col = 0;
                }
+               wd = wd->next;
        }
        if (first)
                printf("No active waves!\n");
 
+       wd = owd;
+       while (wd) {
+               owd = wd->next;
+               free(wd);
+               wd = owd;
+       }
+
        if (asic->options.halt_waves)
                umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
 }
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
index c028c550fa3e..8d5427d63aae 100644
--- a/src/lib/CMakeLists.txt
+++ b/src/lib/CMakeLists.txt
@@ -21,6 +21,7 @@ add_library(umrcore STATIC
   read_vram.c
   ring_decode.c
   scan_config.c
+  scan_waves.c
   sq_cmd_halt_waves.c
   transfer_soc15.c
   wave_status.c
diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c
new file mode 100644
index 000000000000..e3e7e131c888
--- /dev/null
+++ b/src/lib/scan_waves.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis <[email protected]>
+ *
+ */
+#include "umr.h"
+
+struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic)
+{
+       uint32_t se, sh, cu, simd, wave, thread;
+       struct umr_wave_data *opwd, *ppwd, *pwd;
+
+       ppwd = opwd = pwd = calloc(1, sizeof *pwd);
+       if (!pwd) {
+               fprintf(stderr, "[ERROR]: Out of memory\n");
+               return NULL;
+       }
+
+       for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
+       for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
+       for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
+               // ensure the wave data is zeroed out if it was forwarded
+               // from a previous iteration
+               memset(&pwd->ws, 0, sizeof(pwd->ws));
+
+               pwd->se = se;
+               pwd->sh = sh;
+               pwd->cu = cu;
+               umr_get_wave_sq_info(asic, se, sh, cu, &pwd->ws);
+               if (pwd->ws.sq_info.busy) {
+                       for (simd = 0; simd < 4; simd++)
+                       for (wave = 0; wave < 10; wave++) { //both simd/wave 
are hard coded at the moment...
+                               pwd->simd = simd;
+                               pwd->wave = wave;
+                               umr_get_wave_status(asic, se, sh, cu, simd, 
wave, &pwd->ws);
+                               if (pwd->ws.wave_status.halt || 
pwd->ws.wave_status.valid) {
+                                       // grab sgprs..
+                                       if (pwd->ws.wave_status.halt) {
+                                               umr_read_sgprs(asic, &pwd->ws, 
&pwd->sgprs[0]);
+
+                                               pwd->have_vgprs = 1;
+                                               for (thread = 0; thread < 64; 
++thread) {
+                                                       if 
(umr_read_vgprs(asic, &pwd->ws, thread,
+                                                                          
&pwd->vgprs[256 * thread]) < 0) {
+                                                               pwd->have_vgprs 
= 0;
+                                                               break;
+                                                       }
+                                               }
+                                       }
+
+                                       pwd->next = calloc(1, sizeof(*pwd));
+                                       if (!pwd->next) {
+                                               fprintf(stderr, "[ERROR]: Out 
of memory\n");
+                                               return opwd;
+                                       }
+                                       pwd->next->se = pwd->se;
+                                       pwd->next->sh = pwd->sh;
+                                       pwd->next->cu = pwd->cu;
+                                       pwd->next->ws = pwd->ws;
+                                       ppwd = pwd;
+                                       pwd = pwd->next;
+                               }
+                       }
+               }
+       }
+
+       // no waves to capture
+       if (opwd == pwd) {
+               free(pwd);
+               return NULL;
+       }
+
+       // drop tail node
+       free(ppwd->next);
+       ppwd->next = NULL;
+
+       return opwd;
+}
diff --git a/src/umr.h b/src/umr.h
index 7154db7bb2c3..e99ee965527e 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -362,6 +362,14 @@ struct umr_wave_status {
        } trapsts;
 };
 
+struct umr_wave_data {
+       uint32_t vgprs[64 * 256], sgprs[1024];
+       int se, sh, cu, simd, wave, have_vgprs;
+       struct umr_wave_status ws;
+       struct umr_wave_thread *threads;
+       struct umr_wave_data *next;
+};
+
 struct umr_shaders_pgm {
        // VMID and length in bytes
        uint32_t
@@ -561,6 +569,7 @@ int umr_update(struct umr_asic *asic, char *script);
 
 /* lib helpers */
 int umr_get_wave_status(struct umr_asic *asic, unsigned se, unsigned sh, 
unsigned cu, unsigned simd, unsigned wave, struct umr_wave_status *ws);
+struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic);
 int umr_get_wave_sq_info(struct umr_asic *asic, unsigned se, unsigned sh, 
unsigned cu, struct umr_wave_status *ws);
 int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t 
*dst);
 int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t 
thread, uint32_t *dst);
-- 
2.14.3

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to