Module: Mesa
Branch: main
Commit: e49c413a86aa863ce9b3edf5162517357fbf45fd
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e49c413a86aa863ce9b3edf5162517357fbf45fd

Author: Georg Lehmann <[email protected]>
Date:   Sat Nov 11 10:46:13 2023 +0100

aco: use null operand for SOPK s_waitcnt

Both null def and op result in the same correct encoding, but these
instructions optionally read a sgpr, so it makes more sense to use an operand.

Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26163>

---

 src/amd/compiler/aco_insert_NOPs.cpp        | 10 +++++-----
 src/amd/compiler/aco_insert_waitcnt.cpp     |  7 +++----
 src/amd/compiler/aco_lower_to_hw_instr.cpp  |  2 +-
 src/amd/compiler/tests/test_insert_nops.cpp | 30 ++++++++++++++---------------
 4 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/src/amd/compiler/aco_insert_NOPs.cpp 
b/src/amd/compiler/aco_insert_NOPs.cpp
index 7a679e8d283..2ab28fb95ec 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -1033,7 +1033,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& 
ctx, aco_ptr<Instruction>&
          /* Reducing lgkmcnt count to 0 always mitigates the hazard. */
          const SOPP_instruction& sopp = instr->sopp();
          if (sopp.opcode == aco_opcode::s_waitcnt_lgkmcnt) {
-            if (sopp.imm == 0 && sopp.definitions[0].physReg() == sgpr_null)
+            if (sopp.imm == 0 && sopp.operands[0].physReg() == sgpr_null)
                ctx.sgprs_read_by_SMEM.reset();
          } else if (sopp.opcode == aco_opcode::s_waitcnt) {
             wait_imm imm(state.program->gfx_level, instr->sopp().imm);
@@ -1048,12 +1048,12 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& 
ctx, aco_ptr<Instruction>&
     */
    if (instr->isVMEM() || instr->isGlobal() || instr->isScratch()) {
       if (ctx.has_branch_after_DS)
-         bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+         bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
       ctx.has_branch_after_VMEM = ctx.has_branch_after_DS = ctx.has_DS = false;
       ctx.has_VMEM = true;
    } else if (instr->isDS()) {
       if (ctx.has_branch_after_VMEM)
-         bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+         bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
       ctx.has_branch_after_VMEM = ctx.has_branch_after_DS = ctx.has_VMEM = 
false;
       ctx.has_DS = true;
    } else if (instr_is_branch(instr)) {
@@ -1063,7 +1063,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& 
ctx, aco_ptr<Instruction>&
    } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
       /* Only s_waitcnt_vscnt can mitigate the hazard */
       const SOPK_instruction& sopk = instr->sopk();
-      if (sopk.definitions[0].physReg() == sgpr_null && sopk.imm == 0)
+      if (sopk.operands[0].physReg() == sgpr_null && sopk.imm == 0)
          ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = 
ctx.has_branch_after_DS = false;
    }
 
@@ -1142,7 +1142,7 @@ resolve_all_gfx10(State& state, NOP_ctx_gfx10& ctx,
 
    /* LdsBranchVmemWARHazard */
    if (ctx.has_VMEM || ctx.has_branch_after_VMEM || ctx.has_DS || 
ctx.has_branch_after_DS) {
-      bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+      bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
       ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = 
ctx.has_branch_after_DS = false;
    }
 
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp 
b/src/amd/compiler/aco_insert_waitcnt.cpp
index 56b4613847a..0209f44c377 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -447,8 +447,7 @@ check_instr(wait_ctx& ctx, wait_imm& wait, alu_delay_info& 
delay, Instruction* i
 bool
 parse_wait_instr(wait_ctx& ctx, wait_imm& imm, Instruction* instr)
 {
-   if (instr->opcode == aco_opcode::s_waitcnt_vscnt &&
-       instr->definitions[0].physReg() == sgpr_null) {
+   if (instr->opcode == aco_opcode::s_waitcnt_vscnt && 
instr->operands[0].physReg() == sgpr_null) {
       imm.vs = std::min<uint8_t>(imm.vs, instr->sopk().imm);
       return true;
    } else if (instr->opcode == aco_opcode::s_waitcnt) {
@@ -995,8 +994,8 @@ emit_waitcnt(wait_ctx& ctx, 
std::vector<aco_ptr<Instruction>>& instructions, wai
    if (imm.vs != wait_imm::unset_counter) {
       assert(ctx.gfx_level >= GFX10);
       SOPK_instruction* waitcnt_vs =
-         create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, 
Format::SOPK, 0, 1);
-      waitcnt_vs->definitions[0] = Definition(sgpr_null, s1);
+         create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, 
Format::SOPK, 1, 0);
+      waitcnt_vs->operands[0] = Operand(sgpr_null, s1);
       waitcnt_vs->imm = imm.vs;
       instructions.emplace_back(waitcnt_vs);
       imm.vs = wait_imm::unset_counter;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp 
b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index edadca72bb9..a6d7e98a6e1 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2472,7 +2472,7 @@ lower_to_hw_instr(Program* program)
                       * waitcnt insertion doesn't work in a discard early exit 
block.
                       */
                      if (program->gfx_level >= GFX10)
-                        bld.sopk(aco_opcode::s_waitcnt_vscnt, 
Definition(sgpr_null, s1), 0);
+                        bld.sopk(aco_opcode::s_waitcnt_vscnt, 
Operand(sgpr_null, s1), 0);
                      wait_imm pops_exit_wait_imm;
                      pops_exit_wait_imm.vm = 0;
                      if (program->has_smem_buffer_or_global_loads)
diff --git a/src/amd/compiler/tests/test_insert_nops.cpp 
b/src/amd/compiler/tests/test_insert_nops.cpp
index 245b9aaaf0c..83b0b8d7b59 100644
--- a/src/amd/compiler/tests/test_insert_nops.cpp
+++ b/src/amd/compiler/tests/test_insert_nops.cpp
@@ -264,11 +264,11 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
 
    //! p_unit_test 9
    //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[0] offen
-   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_vscnt %0:null imm:0
    //! s1: %0:s[0] = s_mov_b32 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
    create_mubuf_store();
-   bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+   bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), 
Operand::zero());
 
    //! p_unit_test 10
@@ -284,12 +284,12 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
    /* VMEM/LDS with the wrong waitcnt in-between */
    //! p_unit_test 11
    //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offen
-   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_vscnt %0:null imm:0
    //! s_waitcnt_depctr vm_vsrc(0)
    //! s1: %0:s[0] = s_mov_b32 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
    create_mubuf(0);
-   bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+   bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), 
Operand::zero());
 
    //! p_unit_test 12
@@ -530,11 +530,11 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
 
    //! p_unit_test 9
    //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[0] offen
-   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_vscnt %0:null imm:0
    //! v1: %0:v[0] = lds_direct_load %0:m0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
    create_mubuf_store();
-   bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+   bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
    bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), 
Operand(m0, s1));
 
    //! p_unit_test 10
@@ -549,12 +549,12 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
    /* VMEM/LDS with the wrong waitcnt in-between */
    //! p_unit_test 11
    //! v1: %0:v[1] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offen
-   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_vscnt %0:null imm:0
    //! s_waitcnt_depctr vm_vsrc(0)
    //! v1: %0:v[0] = lds_direct_load %0:m0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
    create_mubuf(0, PhysReg(257));
-   bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+   bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
    bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), 
Operand(m0, s1));
 
    //! p_unit_test 12
@@ -579,12 +579,12 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
 
    //! p_unit_test 14
    //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[1], 0 offen
-   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_vscnt %0:null imm:0
    //! s_waitcnt_depctr vm_vsrc(0)
    //! v1: %0:v[0] = lds_direct_load %0:m0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14));
    create_mubuf(0, PhysReg(256), PhysReg(257));
-   bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+   bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
    bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), 
Operand(m0, s1));
 
    finish_insert_nops_test();
@@ -1165,12 +1165,12 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
    /* VMEMtoScalarWriteHazard */
    //! p_unit_test 2
    //! v1: %0:v[0] = ds_read_b32 %0:v[0]
-   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_vscnt %0:null imm:0
    //! s_waitcnt_depctr vm_vsrc(0)
    //! s_setpc_b64 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
    bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), 
Operand(PhysReg(256), v1));
-   bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1),
+   bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1),
             0); /* reset LdsBranchVmemWARHazard */
    bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
 
@@ -1188,7 +1188,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
    //! v1: %0:v[0] = ds_read_b32 %0:v[0]
    //! v_nop
    //! s_branch
-   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_vscnt %0:null imm:0
    //! s_setpc_b64 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
    bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), 
Operand(PhysReg(256), v1));
@@ -1199,7 +1199,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
    //! p_unit_test 5
    //! v1: %0:v[0] = ds_read_b32 %0:v[0]
    //! v_nop
-   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_vscnt %0:null imm:0
    //! s_setpc_b64 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
    bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), 
Operand(PhysReg(256), v1));
@@ -1234,7 +1234,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
    //>> p_unit_test 8
    //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3],  v1: undef, %0:v[0], 
%0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d
    //! s_waitcnt_depctr vm_vsrc(0)
-   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_vscnt %0:null imm:0
    create_program(GFX10, compute_cs, 64, CHIP_UNKNOWN);
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8));
    create_mimg(true, 6, 4);

Reply via email to