llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Sameer Sahasrabuddhe (ssahasra) <details> <summary>Changes</summary> --- Patch is 34.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150391.diff 7 Files Affected: - (modified) llvm/lib/CodeGen/MIRParser/MIParser.cpp (+10-15) - (modified) llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp (+161) - (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+6-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll (+36-36) - (added) llvm/test/CodeGen/AMDGPU/fence-parameters.mir (+29) - (modified) llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir (+9-9) - (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir (+12-12) ``````````diff diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 3a364d5ff0d20..c8ad286a87a35 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1850,28 +1850,25 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) { return false; } +// The target mnemonic is an expression of the form: +// +// Dot(IntegerLiteral|Identifier|Dot)+ +// +// We could be stricter like not terminating in a dot, but that's note important +// where this is being used. bool MIParser::parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest, const MIRFormatter &MF) { assert(Token.is(MIToken::dot)); auto Loc = Token.location(); // record start position - size_t Len = 1; // for "." - lex(); - - // Handle the case that mnemonic starts with number. - if (Token.is(MIToken::IntegerLiteral)) { + size_t Len = 0; + while (Token.is(MIToken::IntegerLiteral) || Token.is(MIToken::dot) || + Token.is(MIToken::Identifier)) { Len += Token.range().size(); lex(); } - - StringRef Src; - if (Token.is(MIToken::comma)) - Src = StringRef(Loc, Len); - else { - assert(Token.is(MIToken::Identifier)); - Src = StringRef(Loc, Len + Token.stringValue().size()); - } + StringRef Src(Loc, Len); int64_t Val; if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val, [this](StringRef::iterator Loc, const Twine &Msg) @@ -1879,8 +1876,6 @@ bool MIParser::parseTargetImmMnemonic(const unsigned OpCode, return true; Dest = MachineOperand::CreateImm(Val); - if (!Token.is(MIToken::comma)) - lex(); return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index 75e3d8c426e73..f318d6ffc1bae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -12,10 +12,135 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMIRFormatter.h" +#include "SIDefines.h" #include "SIMachineFunctionInfo.h" using namespace llvm; +bool parseAtomicOrdering(StringRef Src, unsigned &Order) { + Src.consume_front("."); + for (unsigned I = 0; I <= (unsigned)AtomicOrdering::LAST; ++I) { + if (Src == toIRString((AtomicOrdering)I)) { + Order = I; + return true; + } + } + Order = ~0u; + return false; +} + +static const char *fmtScope(unsigned Scope) { + static const char *Names[] = {"none", "singlethread", "wavefront", + "workgroup", "agent", "system"}; + return Names[Scope]; +} + +bool parseAtomicScope(StringRef Src, unsigned &Scope) { + Src.consume_front("."); + for (unsigned I = 0; + I != (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES; ++I) { + if (Src == fmtScope(I)) { + Scope = I; + return true; + } + } + Scope = ~0u; + return false; +} + +static const char *fmtAddrSpace(unsigned Space) { + static const char *Names[] = {"none", "global", "lds", + "scratch", "gds", "other"}; + return Names[Space]; +} + +bool parseOneAddrSpace(StringRef Src, unsigned &AddrSpace) { + if (Src == "none") { + AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::NONE; + return true; + } + if (Src == "flat") { + AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT; + return true; + } + if (Src == "atomic") { + AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC; + return true; + } + if (Src == "all") { + AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ALL; + return true; + } + for (unsigned I = 1, A = 1; A <= (unsigned)AMDGPU::SIAtomicAddrSpace::LAST; + A <<= 1, ++I) { + if (Src == fmtAddrSpace(I)) { + AddrSpace = A; + return true; + } + } + AddrSpace = ~0u; + return false; +} + +bool parseAddrSpace(StringRef Src, unsigned &AddrSpace) { + Src = Src.trim(); + Src.consume_front("."); + while (!Src.empty()) { + auto [First, Rest] = Src.split('.'); + unsigned OneSpace; + if (!parseOneAddrSpace(First, OneSpace)) + return false; + AddrSpace |= OneSpace; + Src = Rest; + } + return true; +} + +static void fmtAddrSpace(raw_ostream &OS, int64_t Imm) { + OS << '.'; + if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::NONE) { + OS << "none"; + return; + } + if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT) { + OS << "flat"; + return; + } + if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC) { + OS << "atomic"; + return; + } + if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ALL) { + OS << "all"; + return; + } + + ListSeparator LS{"."}; + auto AddrSpace = (AMDGPU::SIAtomicAddrSpace)Imm; + const auto LAST = (unsigned)AMDGPU::SIAtomicAddrSpace::LAST; + + for (unsigned A = 1, I = 1; A <= LAST; A <<= 1, ++I) { + if (any(AddrSpace & (AMDGPU::SIAtomicAddrSpace)A)) + OS << LS << StringRef(fmtAddrSpace(I)); + } +} + +static void printFenceOperand(raw_ostream &OS, const MachineInstr &MI, + std::optional<unsigned int> OpIdx, int64_t Imm) { +#define GET_IDX(Name) \ + AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name) + if (OpIdx == GET_IDX(Ordering)) { + assert(Imm <= (unsigned)AtomicOrdering::LAST); + OS << '.' << StringRef(toIRString((AtomicOrdering)Imm)); + } else if (OpIdx == GET_IDX(Scope)) { + assert(Imm < (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES); + OS << '.' << StringRef(fmtScope(Imm)); + } else if (OpIdx == GET_IDX(AddrSpace)) { + fmtAddrSpace(OS, Imm); + } +#undef GET_IDX +} + void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI, std::optional<unsigned int> OpIdx, int64_t Imm) const { @@ -24,12 +149,46 @@ void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI, assert(OpIdx == 0); printSDelayAluImm(Imm, OS); break; + case AMDGPU::S_WAITCNT_FENCE_soft: + printFenceOperand(OS, MI, OpIdx, Imm); + break; default: MIRFormatter::printImm(OS, MI, OpIdx, Imm); break; } } +static bool +parseFenceParameter(const unsigned int OpIdx, int64_t &Imm, + llvm::StringRef &Src, + llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) { +#define GET_IDX(Name) \ + AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name) + if (OpIdx == (unsigned)GET_IDX(Ordering)) { + unsigned Order = 0; + if (!parseAtomicOrdering(Src, Order)) + return ErrorCallback(Src.begin(), "Expected atomic ordering"); + Imm = Order; + return false; + } + if (OpIdx == (unsigned)GET_IDX(Scope)) { + unsigned Scope = 0; + if (!parseAtomicScope(Src, Scope)) + return ErrorCallback(Src.begin(), "Expected atomic scope"); + Imm = Scope; + return false; + } + if (OpIdx == (unsigned)GET_IDX(AddrSpace)) { + unsigned AddrSpace = 0; + if (!parseAddrSpace(Src, AddrSpace)) + return ErrorCallback(Src.begin(), "Expected address space"); + Imm = AddrSpace; + return false; + } + return true; +#undef GET_IDX +} + /// Implement target specific parsing of immediate mnemonics. The mnemonic is /// a string with a leading dot. bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode, @@ -41,6 +200,8 @@ bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode, switch (OpCode) { case AMDGPU::S_DELAY_ALU: return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback); + case AMDGPU::S_WAITCNT_FENCE_soft: + return parseFenceParameter(OpIdx, Imm, Src, ErrorCallback); default: break; } diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 9d30951cac1a3..d7c2aff1d3411 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -421,13 +421,16 @@ enum CPol { } // namespace CPol /// The atomic synchronization scopes supported by the AMDGPU target. +// +// Note: Update the strings in AMDGPUMIRFormatter.cpp to match this enum. enum class SIAtomicScope { NONE, SINGLETHREAD, WAVEFRONT, WORKGROUP, AGENT, - SYSTEM + SYSTEM, + NUM_SI_ATOMIC_SCOPES }; /// The distinct address spaces supported by the AMDGPU target for @@ -439,6 +442,7 @@ enum class SIAtomicAddrSpace { SCRATCH = 1u << 2, GDS = 1u << 3, OTHER = 1u << 4, + LAST = OTHER, /// The address spaces that can be accessed by a FLAT instruction. FLAT = GLOBAL | LDS | SCRATCH, @@ -449,7 +453,7 @@ enum class SIAtomicAddrSpace { /// All address spaces. ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL) + LLVM_MARK_AS_BITMASK_ENUM(/* Highest bit defined = */ LAST) }; namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll index 1f01c64de546c..6a14c2c9aae7f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -536,36 +536,36 @@ entry: define amdgpu_kernel void @workgroup_one_as_release() #0 { ; GFX6-LABEL: name: workgroup_one_as_release ; GFX6: bb.0.entry: - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_release ; GFX8: bb.0.entry: - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_release ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: workgroup_one_as_release ; GFX10CU: bb.0.entry: - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_release ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_one_as_release ; GFX11CU: bb.0.entry: - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") release @@ -575,38 +575,38 @@ entry: define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_one_as_acq_rel ; GFX6: bb.0.entry: - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_acq_rel ; GFX8: bb.0.entry: - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: workgroup_one_as_acq_rel ; GFX10CU: bb.0.entry: - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_one_as_acq_rel ; GFX11CU: bb.0.entry: - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") acq_rel @@ -616,38 +616,38 @@ entry: define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_one_as_seq_cst ; GFX6: bb.0.entry: - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_seq_cst ; GFX8: bb.0.entry: - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: workgroup_one_as_seq_cst ; GFX10CU: bb.0.entry: - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_one_as_seq_cst ; GFX11CU: bb.0.entry: - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") seq_cst @@ -1301,39 +1301,39 @@ define amdgpu_kernel void @workgroup_release() #0 { ; GFX6-LABEL: name: workgroup_release ; GFX6: bb.0.entry: ; GFX6-NEXT: S_WAITCNT_soft 127 - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_release ; GFX8: bb.0.entry: ; GFX8-NEXT: S_WAITCNT_soft 127 - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_release ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: workgroup_release ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 49279 - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_release ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_release ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 64519 - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup") release @@ -1344,19 +1344,19 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_acq_rel ; GFX6: bb.0.entry: ; GFX6-NEXT: S_WAITCNT_soft 127 - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_acq_rel ; GFX8: bb.0.entry: ; GFX8-NEXT: S_WAITCNT_soft 127 - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_acq_rel ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 @@ -1364,13 +1364,13 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; GFX10CU-LABEL: name: workgroup_acq_rel ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 49279 - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_acq_rel ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 @@ -1378,7 +1378,7 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; GFX11CU-LABEL: name: workgroup_acq_rel ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 64519 - ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup") acq_rel @@ -1389,19 +1389,19 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_seq_cst ; GFX6: bb.0.entry: ; GFX6-NEXT: S_WAITCNT_soft 127 - ; GFX6-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX6-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_seq_cst ; GFX8: bb.0.entry: ; GFX8-NEXT: S_WAITCNT_soft 127 - ; GFX8-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX8-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_seq_cst ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 - ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 @@ -1409,13 +1409,13 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; GFX10CU-LABEL: name: workgroup_seq_cst ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 49279 - ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX10CU-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_seq_cst ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 - ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft 5, 3, 15 + ; GFX11WGP-NEXT: S_WAITCNT_FENCE_soft .release, .workgroup, .atomic ; GFX11WGP-NEXT: S_WAITCN... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/150391 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits