Module: Mesa Branch: main Commit: e49c413a86aa863ce9b3edf5162517357fbf45fd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e49c413a86aa863ce9b3edf5162517357fbf45fd
Author: Georg Lehmann <[email protected]> Date: Sat Nov 11 10:46:13 2023 +0100 aco: use null operand for SOPK s_waitcnt Both null def and op result in the same correct encoding, but these instructions optionally read a sgpr, so it makes more sense to use an operand. Reviewed-by: Rhys Perry <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26163> --- src/amd/compiler/aco_insert_NOPs.cpp | 10 +++++----- src/amd/compiler/aco_insert_waitcnt.cpp | 7 +++---- src/amd/compiler/aco_lower_to_hw_instr.cpp | 2 +- src/amd/compiler/tests/test_insert_nops.cpp | 30 ++++++++++++++--------------- 4 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 7a679e8d283..2ab28fb95ec 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -1033,7 +1033,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>& /* Reducing lgkmcnt count to 0 always mitigates the hazard. */ const SOPP_instruction& sopp = instr->sopp(); if (sopp.opcode == aco_opcode::s_waitcnt_lgkmcnt) { - if (sopp.imm == 0 && sopp.definitions[0].physReg() == sgpr_null) + if (sopp.imm == 0 && sopp.operands[0].physReg() == sgpr_null) ctx.sgprs_read_by_SMEM.reset(); } else if (sopp.opcode == aco_opcode::s_waitcnt) { wait_imm imm(state.program->gfx_level, instr->sopp().imm); @@ -1048,12 +1048,12 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>& */ if (instr->isVMEM() || instr->isGlobal() || instr->isScratch()) { if (ctx.has_branch_after_DS) - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); ctx.has_branch_after_VMEM = ctx.has_branch_after_DS = ctx.has_DS = false; ctx.has_VMEM = true; } else if (instr->isDS()) { if (ctx.has_branch_after_VMEM) - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); ctx.has_branch_after_VMEM = ctx.has_branch_after_DS = ctx.has_VMEM = false; ctx.has_DS = true; } else if (instr_is_branch(instr)) { @@ -1063,7 +1063,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>& } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) { /* Only s_waitcnt_vscnt can mitigate the hazard */ const SOPK_instruction& sopk = instr->sopk(); - if (sopk.definitions[0].physReg() == sgpr_null && sopk.imm == 0) + if (sopk.operands[0].physReg() == sgpr_null && sopk.imm == 0) ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false; } @@ -1142,7 +1142,7 @@ resolve_all_gfx10(State& state, NOP_ctx_gfx10& ctx, /* LdsBranchVmemWARHazard */ if (ctx.has_VMEM || ctx.has_branch_after_VMEM || ctx.has_DS || ctx.has_branch_after_DS) { - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false; } diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 56b4613847a..0209f44c377 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -447,8 +447,7 @@ check_instr(wait_ctx& ctx, wait_imm& wait, alu_delay_info& delay, Instruction* i bool parse_wait_instr(wait_ctx& ctx, wait_imm& imm, Instruction* instr) { - if (instr->opcode == aco_opcode::s_waitcnt_vscnt && - instr->definitions[0].physReg() == sgpr_null) { + if (instr->opcode == aco_opcode::s_waitcnt_vscnt && instr->operands[0].physReg() == sgpr_null) { imm.vs = std::min<uint8_t>(imm.vs, instr->sopk().imm); return true; } else if (instr->opcode == aco_opcode::s_waitcnt) { @@ -995,8 +994,8 @@ emit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wai if (imm.vs != wait_imm::unset_counter) { assert(ctx.gfx_level >= GFX10); SOPK_instruction* waitcnt_vs = - create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 0, 1); - waitcnt_vs->definitions[0] = Definition(sgpr_null, s1); + create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 1, 0); + waitcnt_vs->operands[0] = Operand(sgpr_null, s1); waitcnt_vs->imm = imm.vs; instructions.emplace_back(waitcnt_vs); imm.vs = wait_imm::unset_counter; diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index edadca72bb9..a6d7e98a6e1 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2472,7 +2472,7 @@ lower_to_hw_instr(Program* program) * waitcnt insertion doesn't work in a discard early exit block. */ if (program->gfx_level >= GFX10) - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); wait_imm pops_exit_wait_imm; pops_exit_wait_imm.vm = 0; if (program->has_smem_buffer_or_global_loads) diff --git a/src/amd/compiler/tests/test_insert_nops.cpp b/src/amd/compiler/tests/test_insert_nops.cpp index 245b9aaaf0c..83b0b8d7b59 100644 --- a/src/amd/compiler/tests/test_insert_nops.cpp +++ b/src/amd/compiler/tests/test_insert_nops.cpp @@ -264,11 +264,11 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write) //! p_unit_test 9 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[0] offen - //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_vscnt %0:null imm:0 //! s1: %0:s[0] = s_mov_b32 0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9)); create_mubuf_store(); - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero()); //! p_unit_test 10 @@ -284,12 +284,12 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write) /* VMEM/LDS with the wrong waitcnt in-between */ //! p_unit_test 11 //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offen - //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_vscnt %0:null imm:0 //! s_waitcnt_depctr vm_vsrc(0) //! s1: %0:s[0] = s_mov_b32 0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11)); create_mubuf(0); - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero()); //! p_unit_test 12 @@ -530,11 +530,11 @@ BEGIN_TEST(insert_nops.lds_direct_vmem) //! p_unit_test 9 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[0] offen - //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_vscnt %0:null imm:0 //! v1: %0:v[0] = lds_direct_load %0:m0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9)); create_mubuf_store(); - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1)); //! p_unit_test 10 @@ -549,12 +549,12 @@ BEGIN_TEST(insert_nops.lds_direct_vmem) /* VMEM/LDS with the wrong waitcnt in-between */ //! p_unit_test 11 //! v1: %0:v[1] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offen - //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_vscnt %0:null imm:0 //! s_waitcnt_depctr vm_vsrc(0) //! v1: %0:v[0] = lds_direct_load %0:m0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11)); create_mubuf(0, PhysReg(257)); - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1)); //! p_unit_test 12 @@ -579,12 +579,12 @@ BEGIN_TEST(insert_nops.lds_direct_vmem) //! p_unit_test 14 //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[1], 0 offen - //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_vscnt %0:null imm:0 //! s_waitcnt_depctr vm_vsrc(0) //! v1: %0:v[0] = lds_direct_load %0:m0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14)); create_mubuf(0, PhysReg(256), PhysReg(257)); - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1)); finish_insert_nops_test(); @@ -1165,12 +1165,12 @@ BEGIN_TEST(insert_nops.setpc_gfx10) /* VMEMtoScalarWriteHazard */ //! p_unit_test 2 //! v1: %0:v[0] = ds_read_b32 %0:v[0] - //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_vscnt %0:null imm:0 //! s_waitcnt_depctr vm_vsrc(0) //! s_setpc_b64 0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1)); - bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), + bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0); /* reset LdsBranchVmemWARHazard */ bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8)); @@ -1188,7 +1188,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10) //! v1: %0:v[0] = ds_read_b32 %0:v[0] //! v_nop //! s_branch - //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_vscnt %0:null imm:0 //! s_setpc_b64 0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1)); @@ -1199,7 +1199,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10) //! p_unit_test 5 //! v1: %0:v[0] = ds_read_b32 %0:v[0] //! v_nop - //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_vscnt %0:null imm:0 //! s_setpc_b64 0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1)); @@ -1234,7 +1234,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10) //>> p_unit_test 8 //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d //! s_waitcnt_depctr vm_vsrc(0) - //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_vscnt %0:null imm:0 create_program(GFX10, compute_cs, 64, CHIP_UNKNOWN); bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8)); create_mimg(true, 6, 4);
