https://github.com/ssahasra created https://github.com/llvm/llvm-project/pull/150391
None >From 3c8c9498861fbb055ffdf4343fd9733840e8bc39 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe <sameer.sahasrabud...@amd.com> Date: Thu, 24 Jul 2025 14:48:06 +0530 Subject: [PATCH] [AMDGPU] wip: MIR pretty printing for S_WAITCNT_FENCE_soft --- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 25 ++- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp | 161 ++++++++++++++++++ llvm/lib/Target/AMDGPU/SIDefines.h | 8 +- .../memory-legalizer-atomic-fence.ll | 72 ++++---- llvm/test/CodeGen/AMDGPU/fence-parameters.mir | 29 ++++ .../AMDGPU/insert-waitcnts-fence-soft.mir | 18 +- .../CodeGen/AMDGPU/memory-legalizer-local.mir | 24 +-- 7 files changed, 263 insertions(+), 74 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fence-parameters.mir diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 3a364d5ff0d20..c8ad286a87a35 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1850,28 +1850,25 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) { return false; } +// The target mnemonic is an expression of the form: +// +// Dot(IntegerLiteral|Identifier|Dot)+ +// +// We could be stricter like not terminating in a dot, but that's note important +// where this is being used. bool MIParser::parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest, const MIRFormatter &MF) { assert(Token.is(MIToken::dot)); auto Loc = Token.location(); // record start position - size_t Len = 1; // for "." - lex(); - - // Handle the case that mnemonic starts with number. - if (Token.is(MIToken::IntegerLiteral)) { + size_t Len = 0; + while (Token.is(MIToken::IntegerLiteral) || Token.is(MIToken::dot) || + Token.is(MIToken::Identifier)) { Len += Token.range().size(); lex(); } - - StringRef Src; - if (Token.is(MIToken::comma)) - Src = StringRef(Loc, Len); - else { - assert(Token.is(MIToken::Identifier)); - Src = StringRef(Loc, Len + Token.stringValue().size()); - } + StringRef Src(Loc, Len); int64_t Val; if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val, [this](StringRef::iterator Loc, const Twine &Msg) @@ -1879,8 +1876,6 @@ bool MIParser::parseTargetImmMnemonic(const unsigned OpCode, return true; Dest = MachineOperand::CreateImm(Val); - if (!Token.is(MIToken::comma)) - lex(); return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index 75e3d8c426e73..f318d6ffc1bae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -12,10 +12,135 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMIRFormatter.h" +#include "SIDefines.h" #include "SIMachineFunctionInfo.h" using namespace llvm; +bool parseAtomicOrdering(StringRef Src, unsigned &Order) { + Src.consume_front("."); + for (unsigned I = 0; I <= (unsigned)AtomicOrdering::LAST; ++I) { + if (Src == toIRString((AtomicOrdering)I)) { + Order = I; + return true; + } + } + Order = ~0u; + return false; +} + +static const char *fmtScope(unsigned Scope) { + static const char *Names[] = {"none", "singlethread", "wavefront", + "workgroup", "agent", "system"}; + return Names[Scope]; +} + +bool parseAtomicScope(StringRef Src, unsigned &Scope) { + Src.consume_front("."); + for (unsigned I = 0; + I != (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES; ++I) { + if (Src == fmtScope(I)) { + Scope = I; + return true; + } + } + Scope = ~0u; + return false; +} + +static const char *fmtAddrSpace(unsigned Space) { + static const char *Names[] = {"none", "global", "lds", + "scratch", "gds", "other"}; + return Names[Space]; +} + +bool parseOneAddrSpace(StringRef Src, unsigned &AddrSpace) { + if (Src == "none") { + AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::NONE; + return true; + } + if (Src == "flat") { + AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT; + return true; + } + if (Src == "atomic") { + AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC; + return true; + } + if (Src == "all") { + AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ALL; + return true; + } + for (unsigned I = 1, A = 1; A <= (unsigned)AMDGPU::SIAtomicAddrSpace::LAST; + A <<= 1, ++I) { + if (Src == fmtAddrSpace(I)) { + AddrSpace = A; + return true; + } + } + AddrSpace = ~0u; + return false; +} + +bool parseAddrSpace(StringRef Src, unsigned &AddrSpace) { + Src = Src.trim(); + Src.consume_front("."); + while (!Src.empty()) { + auto [First, Rest] = Src.split('.'); + unsigned OneSpace; + if (!parseOneAddrSpace(First, OneSpace)) + return false; + AddrSpace |= OneSpace; + Src = Rest; + } + return true; +} + +static void fmtAddrSpace(raw_ostream &OS, int64_t Imm) { + OS << '.'; + if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::NONE) { + OS << "none"; + return; + } + if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT) { + OS << "flat"; + return; + } + if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC) { + OS << "atomic"; + return; + } + if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ALL) { + OS << "all"; + return; + } + + ListSeparator LS{"."}; + auto AddrSpace = (AMDGPU::SIAtomicAddrSpace)Imm; + const auto LAST = (unsigned)AMDGPU::SIAtomicAddrSpace::LAST; + + for (unsigned A = 1, I = 1; A <= LAST; A <<= 1, ++I) { + if (any(AddrSpace & (AMDGPU::SIAtomicAddrSpace)A)) + OS << LS << StringRef(fmtAddrSpace(I)); + } +} + +static void printFenceOperand(raw_ostream &OS, const MachineInstr &MI, + std::optional<unsigned int> OpIdx, int64_t Imm) { +#define GET_IDX(Name) \ + AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name) + if (OpIdx == GET_IDX(Ordering)) { + assert(Imm <= (unsigned)AtomicOrdering::LAST); + OS << '.' << StringRef(toIRString((AtomicOrdering)Imm)); + } else if (OpIdx == GET_IDX(Scope)) { + assert(Imm < (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES); + OS << '.' << StringRef(fmtScope(Imm)); + } else if (OpIdx == GET_IDX(AddrSpace)) { + fmtAddrSpace(OS, Imm); + } +#undef GET_IDX +} + void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI, std::optional<unsigned int> OpIdx, int64_t Imm) const { @@ -24,12 +149,46 @@ void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI, assert(OpIdx == 0); printSDelayAluImm(Imm, OS); break; + case AMDGPU::S_WAITCNT_FENCE_soft: + printFenceOperand(OS, MI, OpIdx, Imm); + break; default: MIRFormatter::printImm(OS, MI, OpIdx, Imm); break; } } +static bool +parseFenceParameter(const unsigned int OpIdx, int64_t &Imm, + llvm::StringRef &Src, + llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) { +#define GET_IDX(Name) \ + AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name) + if (OpIdx == (unsigned)GET_IDX(Ordering)) { + unsigned Order = 0; + if (!parseAtomicOrdering(Src, Order)) + return ErrorCallback(Src.begin(), "Expected atomic ordering"); + Imm = Order; + return false; + } + if (OpIdx == (unsigned)GET_IDX(Scope)) { + unsigned Scope = 0; + if (!parseAtomicScope(Src, Scope)) + return ErrorCallback(Src.begin(), "Expected atomic scope"); + Imm = Scope; + return false; + } + if (OpIdx == (unsigned)GET_IDX(AddrSpace)) { + unsigned AddrSpace = 0; + if (!parseAddrSpace(Src, AddrSpace)) + return ErrorCallback(Src.begin(), "Expected address space"); + Imm = AddrSpace; + return false; + } + return true; +#undef GET_IDX +} + /// Implement target specific parsing of immediate mnemonics. The mnemonic is /// a string with a leading dot. bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode, @@ -41,6 +200,8 @@ bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode, switch (OpCode) { case AMDGPU::S_DELAY_ALU: return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback); + case AMDGPU::S_WAITCNT_FENCE_soft: + return parseFenceParameter(OpIdx, Imm, Src, ErrorCallback); default: break; } diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 9d30951cac1a3..d7c2aff1d3411 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -421,13 +421,16 @@ enum CPol { } // namespace CPol /// The atomic synchronization scopes supported by the AMDGPU target. +// +// Note: Update the strings in AMDGPUMIRFormatter.cpp to match this enum. enum class SIAtomicScope { NONE, SINGLETHREAD, WAVEFRONT, WORKGROUP, AGENT, - SYSTEM + SYSTEM, + NUM_SI_ATOMIC_SCOPES }; /// The distinct address spaces supported by the AMDGPU target for @@ -439,6 +442,7 @@ enum class SIAtomicAddrSpace { SCRATCH = 1u << 2, GDS = 1u << 3, OTHER = 1u << 4, + LAST = OTHER, /// The address spaces that can be accessed by a FLAT instruction. FLAT = GLOBAL | LDS | SCRATCH, @@ -449,7 +453,7 @@ enum class SIAtomicAddrSpace { /// All address spaces. ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL) + LLVM_MARK_AS_BITMASK_ENUM(/* Highest bit defined = */ LAST) }; namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll index 1f01c64de546c..6a14c2c9aae7f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -536,36 +536,36 @@ entry: define amdgpu_kernel void @workgroup_one_as_release() #0 { ; GFX6-LABEL: name: workgroup_one_as_release ; GFX6: bb.0.entry: - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_release ; GFX8: bb.0.entry: - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_release ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: workgroup_one_as_release ; GFX10CU: bb.0.entry: - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_release ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_one_as_release ; GFX11CU: bb.0.entry: - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") release @@ -575,38 +575,38 @@ entry: define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_one_as_acq_rel ; GFX6: bb.0.entry: - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_acq_rel ; GFX8: bb.0.entry: - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: workgroup_one_as_acq_rel ; GFX10CU: bb.0.entry: - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_one_as_acq_rel ; GFX11CU: bb.0.entry: - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") acq_rel @@ -616,38 +616,38 @@ entry: define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_one_as_seq_cst ; GFX6: bb.0.entry: - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_seq_cst ; GFX8: bb.0.entry: - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: workgroup_one_as_seq_cst ; GFX10CU: bb.0.entry: - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_one_as_seq_cst ; GFX11CU: bb.0.entry: - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") seq_cst @@ -1301,39 +1301,39 @@ define amdgpu_kernel void @workgroup_release() #0 { ; GFX6-LABEL: name: workgroup_release ; GFX6: bb.0.entry: ; GFX6-NEXT: S_WAITCNT_soft 127 - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_release ; GFX8: bb.0.entry: ; GFX8-NEXT: S_WAITCNT_soft 127 - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_release ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: workgroup_release ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 49279 - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_release ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_release ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 64519 - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup") release @@ -1344,19 +1344,19 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_acq_rel ; GFX6: bb.0.entry: ; GFX6-NEXT: S_WAITCNT_soft 127 - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_acq_rel ; GFX8: bb.0.entry: ; GFX8-NEXT: S_WAITCNT_soft 127 - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_acq_rel ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 @@ -1364,13 +1364,13 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; GFX10CU-LABEL: name: workgroup_acq_rel ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 49279 - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_acq_rel ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 @@ -1378,7 +1378,7 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; GFX11CU-LABEL: name: workgroup_acq_rel ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 64519 - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup") acq_rel @@ -1389,19 +1389,19 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_seq_cst ; GFX6: bb.0.entry: ; GFX6-NEXT: S_WAITCNT_soft 127 - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_seq_cst ; GFX8: bb.0.entry: ; GFX8-NEXT: S_WAITCNT_soft 127 - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_seq_cst ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 @@ -1409,13 +1409,13 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; GFX10CU-LABEL: name: workgroup_seq_cst ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 49279 - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_seq_cst ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 @@ -1423,7 +1423,7 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; GFX11CU-LABEL: name: workgroup_seq_cst ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 64519 - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup") seq_cst diff --git a/llvm/test/CodeGen/AMDGPU/fence-parameters.mir b/llvm/test/CodeGen/AMDGPU/fence-parameters.mir new file mode 100644 index 0000000000000..6fc6200518a3a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fence-parameters.mir @@ -0,0 +1,29 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -run-pass none -o - %s | FileCheck %s +# +# This test ensures that the MIR parser parses fence parameters correctly. +# We can see some canonicalization in the roundtrip. + +--- +name: foo +body: | + bb.0.entry: + + ; CHECK-LABEL: name: foo + ; CHECK: S_WAITCNT_FENCE_soft .release, .workgroup, .flat + ; CHECK-NEXT: S_WAITCNT_FENCE_soft .monotonic, .system, .none + ; CHECK-NEXT: S_WAITCNT_FENCE_soft .acquire, .none, .none + ; CHECK-NEXT: S_WAITCNT_FENCE_soft .release, .none, .global.scratch + ; CHECK-NEXT: S_WAITCNT_FENCE_soft .acquire, .agent, .lds + ; CHECK-NEXT: S_WAITCNT_FENCE_soft .not_atomic, .wavefront, .atomic + ; CHECK-NEXT: S_WAITCNT_FENCE_soft .release, .system, .global.scratch + ; CHECK-NEXT: S_WAITCNT_FENCE_soft .acquire, .system, .atomic + S_WAITCNT_FENCE_soft .release, .workgroup, .global.lds.scratch + S_WAITCNT_FENCE_soft .monotonic, .system, .none + S_WAITCNT_FENCE_soft .acquire, .none, .none + S_WAITCNT_FENCE_soft .release, .none, .scratch.global + S_WAITCNT_FENCE_soft .acquire, .agent, .lds + S_WAITCNT_FENCE_soft .not_atomic, .wavefront, .atomic + S_WAITCNT_FENCE_soft 5, 5, 5 + S_WAITCNT_FENCE_soft 4, 5, 15 +... diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir index 4b129b22e87bf..fcb3aef82107e 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir @@ -16,7 +16,7 @@ body: | ; GCN-NEXT: S_ENDPGM 0 $m0 = S_MOV_B32 0 BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) - S_WAITCNT_FENCE_soft 5, 3, 15 + S_WAITCNT_FENCE_soft .release, .workgroup, .global.lds.scratch.gds $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec S_ENDPGM 0 @@ -39,7 +39,7 @@ body: | $m0 = S_MOV_B32 0 BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec - S_WAITCNT_FENCE_soft 5, 3, 15 + S_WAITCNT_FENCE_soft .release, .workgroup, .global.lds.scratch.gds $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec S_ENDPGM 0 @@ -57,7 +57,7 @@ body: | ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec - S_WAITCNT_FENCE_soft 5, 3, 15 + S_WAITCNT_FENCE_soft .release, .workgroup, .global.lds.scratch.gds $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec S_ENDPGM 0 @@ -78,7 +78,7 @@ body: | ; GCN-NEXT: S_ENDPGM 0 BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec - S_WAITCNT_FENCE_soft 5, 5, 15 + S_WAITCNT_FENCE_soft .release, .system, .global.lds.scratch.gds $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec S_ENDPGM 0 @@ -98,7 +98,7 @@ body: | ; GCN-NEXT: S_ENDPGM 0 BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec - S_WAITCNT_FENCE_soft 5, 5, 5 + S_WAITCNT_FENCE_soft .release, .system, .global.scratch $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec S_ENDPGM 0 @@ -118,7 +118,7 @@ body: | ; GCN-NEXT: S_ENDPGM 0 BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec - S_WAITCNT_FENCE_soft 4, 5, 15 + S_WAITCNT_FENCE_soft .acquire, .system, .atomic $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec S_ENDPGM 0 @@ -138,7 +138,7 @@ body: | ; GCN-NEXT: S_ENDPGM 0 BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec - S_WAITCNT_FENCE_soft 5, 2, 15 + S_WAITCNT_FENCE_soft .release, .wavefront, .global.lds.scratch.gds $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec S_ENDPGM 0 @@ -162,7 +162,7 @@ body: | BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec S_WAITCNT 3952 - S_WAITCNT_FENCE_soft 5, 3, 15 + S_WAITCNT_FENCE_soft .release, .workgroup, .global.lds.scratch.gds $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec S_ENDPGM 0 @@ -185,7 +185,7 @@ body: | $m0 = S_MOV_B32 0 BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec - S_WAITCNT_FENCE_soft 5, 3, 15 + S_WAITCNT_FENCE_soft .release, .workgroup, .global.lds.scratch.gds S_WAITCNT 3952 $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir index 7a8e00acede74..c004e099df24d 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir @@ -321,9 +321,9 @@ body: | ; GCN-NEXT: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) poison`, align 4, addrspace 4) ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_FENCE_soft 7, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .seq_cst, .workgroup, .lds ; GCN-NEXT: renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst (s32) from `ptr addrspace(3) poison`, addrspace 3) - ; GCN-NEXT: S_WAITCNT_FENCE_soft 7, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .seq_cst, .workgroup, .lds ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`) @@ -435,9 +435,9 @@ body: | ; GCN-NEXT: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) poison`, align 4, addrspace 4) ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_FENCE_soft 7, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .seq_cst, .workgroup, .lds ; GCN-NEXT: renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst (s32) from `ptr addrspace(3) poison`, addrspace 3) - ; GCN-NEXT: S_WAITCNT_FENCE_soft 7, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .seq_cst, .workgroup, .lds ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`) @@ -549,9 +549,9 @@ body: | ; GCN-NEXT: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) poison`, align 4, addrspace 4) ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_FENCE_soft 7, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .seq_cst, .workgroup, .lds ; GCN-NEXT: renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst (s32) from `ptr addrspace(3) poison`, addrspace 3) - ; GCN-NEXT: S_WAITCNT_FENCE_soft 7, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .seq_cst, .workgroup, .lds ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`) @@ -820,7 +820,7 @@ body: | ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_FENCE_soft 5, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .lds ; GCN-NEXT: DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release (s32) into `ptr addrspace(3) poison`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) @@ -845,7 +845,7 @@ body: | ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_FENCE_soft 5, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .lds ; GCN-NEXT: DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst (s32) into `ptr addrspace(3) poison`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) @@ -918,7 +918,7 @@ body: | ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_FENCE_soft 5, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .lds ; GCN-NEXT: DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release (s32) into `ptr addrspace(3) poison`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) @@ -943,7 +943,7 @@ body: | ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_FENCE_soft 5, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .lds ; GCN-NEXT: DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst (s32) into `ptr addrspace(3) poison`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) @@ -1016,7 +1016,7 @@ body: | ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_FENCE_soft 5, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .lds ; GCN-NEXT: DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release (s32) into `ptr addrspace(3) poison`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) @@ -1041,7 +1041,7 @@ body: | ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_FENCE_soft 5, 3, 2 + ; GCN-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .lds ; GCN-NEXT: DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst (s32) into `ptr addrspace(3) poison`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits