llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) <details> <summary>Changes</summary> --- Full diff: https://github.com/llvm/llvm-project/pull/112116.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+14-1) - (added) llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir (+22) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 9866ecbdddb608..28e26dc47b0ab4 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1600,6 +1600,17 @@ static bool callWaitsOnFunctionReturn(const MachineInstr &MI) { return true; } +/// \returns true if \p MI is not the first terminator of its associated MBB. +static bool checkIfMBBNonFirstTerminator(const MachineInstr &MI) { + const auto &MBB = MI.getParent(); + if (MBB->getFirstTerminator() == MI) + return false; + for (const auto &I : MBB->terminators()) + if (&I == &MI) + return true; + return false; +} + /// Generate s_waitcnt instruction to be placed before cur_Inst. /// Instructions of a given type are returned in order, /// but instructions of different types can complete out of order. @@ -1825,7 +1836,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // Verify that the wait is actually needed. ScoreBrackets.simplifyWaitcnt(Wait); - if (ForceEmitZeroFlag) + // When forcing emit, we need to skip non-first terminators of a MBB because + // that would break the terminators of the MBB. + if (ForceEmitZeroFlag && !checkIfMBBNonFirstTerminator(MI)) Wait = WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false); if (ForceEmitWaitcnt[LOAD_CNT]) diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir new file mode 100644 index 00000000000000..530d1981f053e9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir @@ -0,0 +1,22 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 %s -o - | FileCheck %s + +... + +# CHECK-LABEL: waitcnt-debug-non-first-terminators +# CHECK: S_WAITCNT 0 +# CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc +# CHECK-NEXT: S_BRANCH %bb.2, implicit $scc + +name: waitcnt-debug-non-first-terminators +liveins: +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + S_CBRANCH_SCC1 %bb.1, implicit $scc + S_BRANCH %bb.2, implicit $scc + bb.1: + S_NOP 0 + bb.2: + S_NOP 0 +... `````````` </details> https://github.com/llvm/llvm-project/pull/112116 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits