================ @@ -2326,6 +2326,20 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, } #endif + if (ST->isPreciseMemoryEnabled() && Inst.mayLoadOrStore()) { + AMDGPU::Waitcnt Wait; + if (ST->hasExtendedWaitCounts()) + Wait = AMDGPU::Waitcnt(0, 0, 0, 0, 0, 0, 0); + else + Wait = AMDGPU::Waitcnt(0, 0, 0, 0); + + if (!Inst.mayStore()) + Wait.StoreCnt = ~0u; ---------------- jayfoad wrote:
```suggestion AMDGPU::Waitcnt Wait = WCG->getAllZeroWaitcnt(Inst.mayStore()); ``` However, as a general rule: - loads and atomics-with-return update LOADcnt - stores and atomics-without-return update STOREcnt so it might be more accurate to use the condition `Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst)`. Please make sure you have tests for atomics with and without return. https://github.com/llvm/llvm-project/pull/79236 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits