gandhi21299 updated this revision to Diff 366294.
gandhi21299 added a comment.
- added clang/test/CodeGenCUDA/fp-atomics-optremarks.cu back
- moved `Remark` declaration into the `else` block
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D106891/new/
https://reviews.llvm.org/D106891
Files:
clang/test/CodeGenCUDA/fp-atomics-optremarks.cu
clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/AtomicExpandPass.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.h
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/AMDGPU/fp-atomics-remarks-gfx90a.ll
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
llvm/test/CodeGen/X86/O0-pipeline.ll
llvm/test/CodeGen/X86/opt-pipeline.ll
Index: llvm/test/CodeGen/X86/opt-pipeline.ll
===================================================================
--- llvm/test/CodeGen/X86/opt-pipeline.ll
+++ llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -16,15 +16,20 @@
; CHECK-NEXT: Target Pass Configuration
; CHECK-NEXT: Machine Module Information
; CHECK-NEXT: Target Transform Information
+; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Type-Based Alias Analysis
; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Assumption Cache Tracker
-; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: Dominator Tree Construction
+; CHECK-NEXT: Natural Loop Information
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
+; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: Lower AMX intrinsics
; CHECK-NEXT: Lower AMX type for load/store
Index: llvm/test/CodeGen/X86/O0-pipeline.ll
===================================================================
--- llvm/test/CodeGen/X86/O0-pipeline.ll
+++ llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -10,13 +10,18 @@
; CHECK-NEXT: Target Pass Configuration
; CHECK-NEXT: Machine Module Information
; CHECK-NEXT: Target Transform Information
+; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Assumption Cache Tracker
-; CHECK-NEXT: Profile summary info
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: Dominator Tree Construction
+; CHECK-NEXT: Natural Loop Information
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
+; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: Lower AMX intrinsics
; CHECK-NEXT: Lower AMX type for load/store
Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -44,6 +44,11 @@
; GCN-O0-NEXT: Lower OpenCL enqueued blocks
; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O0-NEXT: FunctionPass Manager
+; GCN-O0-NEXT: Dominator Tree Construction
+; GCN-O0-NEXT: Natural Loop Information
+; GCN-O0-NEXT: Lazy Branch Probability Analysis
+; GCN-O0-NEXT: Lazy Block Frequency Analysis
+; GCN-O0-NEXT: Optimization Remark Emitter
; GCN-O0-NEXT: Expand Atomic instructions
; GCN-O0-NEXT: Lower constant intrinsics
; GCN-O0-NEXT: Remove unreachable blocks from the CFG
@@ -180,6 +185,11 @@
; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Infer address spaces
+; GCN-O1-NEXT: Dominator Tree Construction
+; GCN-O1-NEXT: Natural Loop Information
+; GCN-O1-NEXT: Lazy Branch Probability Analysis
+; GCN-O1-NEXT: Lazy Block Frequency Analysis
+; GCN-O1-NEXT: Optimization Remark Emitter
; GCN-O1-NEXT: Expand Atomic instructions
; GCN-O1-NEXT: AMDGPU Promote Alloca
; GCN-O1-NEXT: Dominator Tree Construction
@@ -431,6 +441,11 @@
; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Infer address spaces
+; GCN-O1-OPTS-NEXT: Dominator Tree Construction
+; GCN-O1-OPTS-NEXT: Natural Loop Information
+; GCN-O1-OPTS-NEXT: Lazy Branch Probability Analysis
+; GCN-O1-OPTS-NEXT: Lazy Block Frequency Analysis
+; GCN-O1-OPTS-NEXT: Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: Expand Atomic instructions
; GCN-O1-OPTS-NEXT: AMDGPU Promote Alloca
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
@@ -715,6 +730,11 @@
; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Infer address spaces
+; GCN-O2-NEXT: Dominator Tree Construction
+; GCN-O2-NEXT: Natural Loop Information
+; GCN-O2-NEXT: Lazy Branch Probability Analysis
+; GCN-O2-NEXT: Lazy Block Frequency Analysis
+; GCN-O2-NEXT: Optimization Remark Emitter
; GCN-O2-NEXT: Expand Atomic instructions
; GCN-O2-NEXT: AMDGPU Promote Alloca
; GCN-O2-NEXT: Dominator Tree Construction
@@ -1001,6 +1021,11 @@
; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Infer address spaces
+; GCN-O3-NEXT: Dominator Tree Construction
+; GCN-O3-NEXT: Natural Loop Information
+; GCN-O3-NEXT: Lazy Branch Probability Analysis
+; GCN-O3-NEXT: Lazy Block Frequency Analysis
+; GCN-O3-NEXT: Optimization Remark Emitter
; GCN-O3-NEXT: Expand Atomic instructions
; GCN-O3-NEXT: AMDGPU Promote Alloca
; GCN-O3-NEXT: Dominator Tree Construction
Index: llvm/test/CodeGen/AMDGPU/fp-atomics-remarks-gfx90a.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/fp-atomics-remarks-gfx90a.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs --pass-remarks=atomic-expand \
+; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=GFX90A-CAS
+
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at system memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at agent memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at workgroup memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at wavefront memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at singlethread memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at one-as memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at agent-one-as memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at workgroup-one-as memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at wavefront-one-as memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an fadd operation at singlethread-one-as memory scope
+
+; GFX90A-CAS-LABEL: atomic_add_cas:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_agent:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_agent(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("agent") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_workgroup:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_workgroup(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("workgroup") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_wavefront:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_wavefront(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("wavefront") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_singlethread:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_singlethread(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("singlethread") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("one-as") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_agent_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_agent_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("agent-one-as") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_workgroup_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_workgroup_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("workgroup-one-as") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_wavefront_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_wavefront_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("wavefront-one-as") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_singlethread_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_singlethread_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("singlethread-one-as") monotonic, align 4
+ ret void
+}
Index: llvm/lib/Target/X86/X86ISelLowering.h
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.h
+++ llvm/lib/Target/X86/X86ISelLowering.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -1591,7 +1592,8 @@
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
- shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI,
+ OptimizationRemarkEmitter *ORE) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
@@ -29361,7 +29362,8 @@
}
TargetLowering::AtomicExpansionKind
-X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+X86TargetLowering::shouldExpandAtomicRMWInIR(
+ AtomicRMWInst *AI, OptimizationRemarkEmitter *ORE) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
Type *MemType = AI->getType();
Index: llvm/lib/Target/AMDGPU/SIISelLowering.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -452,7 +452,9 @@
const SelectionDAG &DAG,
bool SNaN = false,
unsigned Depth = 0) const override;
- AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+ AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *,
+ OptimizationRemarkEmitter *ORE) const override;
virtual const TargetRegisterClass *
getRegClassFor(MVT VT, bool isDivergent) const override;
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15,10 +15,12 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -12118,8 +12120,8 @@
return DenormMode == DenormalMode::getIEEE();
}
-TargetLowering::AtomicExpansionKind
-SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+TargetLowering::AtomicExpansionKind SITargetLowering::shouldExpandAtomicRMWInIR(
+ AtomicRMWInst *RMW, OptimizationRemarkEmitter *ORE) const {
switch (RMW->getOperation()) {
case AtomicRMWInst::FAdd: {
Type *Ty = RMW->getType();
@@ -12185,7 +12187,7 @@
break;
}
- return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
+ return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW, ORE);
}
const TargetRegisterClass *
Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -326,7 +327,9 @@
return MVT::i32;
}
- AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+ AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *,
+ OptimizationRemarkEmitter *ORE) const override;
bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1,
LLT Ty2) const override;
Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -18,6 +18,7 @@
#include "AMDGPUMachineFunction.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -4845,7 +4846,8 @@
}
TargetLowering::AtomicExpansionKind
-AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(
+ AtomicRMWInst *RMW, OptimizationRemarkEmitter *ORE) const {
switch (RMW->getOperation()) {
case AtomicRMWInst::Nand:
case AtomicRMWInst::FAdd:
Index: llvm/lib/CodeGen/AtomicExpandPass.cpp
===================================================================
--- llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -17,6 +17,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -58,6 +60,7 @@
class AtomicExpand: public FunctionPass {
const TargetLowering *TLI = nullptr;
+ OptimizationRemarkEmitter *ORE;
public:
static char ID; // Pass identification, replacement for typeid
@@ -69,6 +72,7 @@
bool runOnFunction(Function &F) override;
private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
@@ -165,11 +169,16 @@
Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
}
+void AtomicExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+}
+
bool AtomicExpand::runOnFunction(Function &F) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
+ ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto &TM = TPC->getTM<TargetMachine>();
if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
return false;
@@ -570,7 +579,10 @@
}
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
- switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ LLVMContext &Ctx = AI->getModule()->getContext();
+ TargetLowering::AtomicExpansionKind Kind =
+ TLI->shouldExpandAtomicRMWInIR(AI, ORE);
+ switch (Kind) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
@@ -600,6 +612,18 @@
expandPartwordAtomicRMW(AI,
TargetLoweringBase::AtomicExpansionKind::CmpXChg);
} else {
+ SmallVector<StringRef> SSNs;
+ Ctx.getSyncScopeNames(SSNs);
+ auto MemScope = SSNs[AI->getSyncScopeID()].empty()
+ ? "system"
+ : SSNs[AI->getSyncScopeID()];
+ OptimizationRemark Remark(DEBUG_TYPE, "Passed", AI->getFunction());
+ ORE->emit([&]() {
+ Remark << "A compare and swap loop was generated for an "
+ << AI->getOperationName(AI->getOperation()) << " operation at "
+ << MemScope << " memory scope";
+ return Remark;
+ });
expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
}
return true;
Index: llvm/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -28,6 +28,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -2002,7 +2003,9 @@
/// Returns how the IR-level AtomicExpand pass should expand the given
/// AtomicRMW, if at all. Default is to never expand.
- virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+ virtual AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW,
+ OptimizationRemarkEmitter *ORE) const {
return RMW->isFloatingPointOperation() ?
AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
}
Index: clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \
+// RUN: -Rpass=atomic-expand -S -o - 2>&1 | \
+// RUN: FileCheck %s --check-prefix=REMARK
+
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \
+// RUN: -Rpass=atomic-expand -S -emit-llvm -o - 2>&1 | \
+// RUN: FileCheck %s --check-prefix=GFX90A-CAS
+
+// REQUIRES: amdgpu-registered-target
+
+typedef enum memory_order {
+ memory_order_relaxed = __ATOMIC_RELAXED,
+ memory_order_acquire = __ATOMIC_ACQUIRE,
+ memory_order_release = __ATOMIC_RELEASE,
+ memory_order_acq_rel = __ATOMIC_ACQ_REL,
+ memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+typedef enum memory_scope {
+ memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+ memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+ memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+ memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+ memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
+} memory_scope;
+
+// REMARK: remark: A compare and swap loop was generated for an fadd operation at workgroup-one-as memory scope [-Rpass=atomic-expand]
+// GFX90A-CAS-LABEL: @atomic_cas_system
+// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
+float atomic_cas_system(__global atomic_float *d, float a) {
+ return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
+}
Index: clang/test/CodeGenCUDA/fp-atomics-optremarks.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/fp-atomics-optremarks.cu
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -target-cpu gfx90a -Rpass=atomic-expand -S -o - 2>&1 | \
+// RUN: FileCheck %s --check-prefix=GFX90A-CAS
+
+// REQUIRES: amdgpu-registered-target
+
+#include "Inputs/cuda.h"
+#include <stdatomic.h>
+
+// GFX90A-CAS: A compare and swap loop was generated for an fadd operation at system memory scope
+// GFX90A-CAS-LABEL: _Z14atomic_add_casPf
+// GFX90A-CAS: flat_atomic_cmpswap v0, v[2:3], v[4:5] glc
+// GFX90A-CAS: s_cbranch_execnz
+__device__ float atomic_add_cas(float *p) {
+ return __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
+}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits