https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/96906
Use this to replace the emission of the amdgpu-unsafe-fp-atomics attribute in favor of per-instruction metadata. In the future new fine grained controls should be introduced that also cover the integer cases. Add a wrapper around CreateAtomicRMW that appends the metadata, and update a few use contexts to use it. >From 350579354c8e57c815e4f2f28be9413a1f0a1176 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Thu, 27 Jun 2024 12:04:52 +0200 Subject: [PATCH] clang: Allow targets to set custom metadata on atomics Use this to replace the emission of the amdgpu-unsafe-fp-atomics attribute in favor of per-instruction metadata. In the future new fine grained controls should be introduced that also cover the integer cases. Add a wrapper around CreateAtomicRMW that appends the metadata, and update a few use contexts to use it. --- clang/lib/CodeGen/CGAtomic.cpp | 13 +- clang/lib/CodeGen/CGExprScalar.cpp | 13 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 +- clang/lib/CodeGen/CodeGenFunction.h | 7 + clang/lib/CodeGen/TargetInfo.h | 4 + clang/lib/CodeGen/Targets/AMDGPU.cpp | 19 ++ .../test/CodeGen/AMDGPU/amdgpu-atomic-float.c | 316 ++++++++++++++++++ clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu | 102 ++++-- .../test/OpenMP/amdgpu-unsafe-fp-atomics.cpp | 59 ++++ 9 files changed, 505 insertions(+), 32 deletions(-) create mode 100644 clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c create mode 100644 clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index fbf942d06ca6e..fbe9569e50ef6 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -727,7 +727,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::AtomicRMWInst *RMWI = - CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope); + CGF.emitAtomicRMWInst(Op, Ptr, LoadVal1, Order, Scope); RMWI->setVolatile(E->isVolatile()); // For __atomic_*_fetch operations, perform the operation again to @@ -2034,6 +2034,17 @@ std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange( IsWeak); } +llvm::AtomicRMWInst * +CodeGenFunction::emitAtomicRMWInst(llvm::AtomicRMWInst::BinOp Op, Address Addr, + llvm::Value *Val, llvm::AtomicOrdering Order, + llvm::SyncScope::ID SSID) { + + llvm::AtomicRMWInst *RMW = + Builder.CreateAtomicRMW(Op, Addr, Val, Order, SSID); + getTargetHooks().setTargetAtomicMetadata(*this, *RMW); + return RMW; +} + void CodeGenFunction::EmitAtomicUpdate( LValue LVal, llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile) { diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index f40f3c273206b..8eb7a64c144c8 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2835,9 +2835,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub; llvm::Value *amt = llvm::ConstantFP::get( VMContext, llvm::APFloat(static_cast<float>(1.0))); - llvm::Value *old = - Builder.CreateAtomicRMW(aop, LV.getAddress(), amt, - llvm::AtomicOrdering::SequentiallyConsistent); + llvm::AtomicRMWInst *old = + CGF.emitAtomicRMWInst(aop, LV.getAddress(), amt, + llvm::AtomicOrdering::SequentiallyConsistent); + return isPre ? Builder.CreateBinOp(op, old, amt) : old; } value = EmitLoadOfLValue(LV, E->getExprLoc()); @@ -3577,9 +3578,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( EmitScalarConversion(OpInfo.RHS, E->getRHS()->getType(), LHSTy, E->getExprLoc()), LHSTy); - Value *OldVal = Builder.CreateAtomicRMW( - AtomicOp, LHSLV.getAddress(), Amt, - llvm::AtomicOrdering::SequentiallyConsistent); + + llvm::AtomicRMWInst *OldVal = + CGF.emitAtomicRMWInst(AtomicOp, LHSLV.getAddress(), Amt); // Since operation is atomic, the result type is guaranteed to be the // same as the input in LLVM terms. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f73d32de7c484..8c152fef73557 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6235,8 +6235,8 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC, X.getAddress().getElementType()); } - llvm::Value *Res = - CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); + llvm::AtomicRMWInst *Res = + CGF.emitAtomicRMWInst(RMWOp, X.getAddress(), UpdateVal, AO); return std::make_pair(true, RValue::get(Res)); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 13f12b5d878a6..6cfcb76eea42a 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4153,6 +4153,13 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored()); + /// Emit an atomicrmw instruction, and applying relevant metadata when + /// applicable. + llvm::AtomicRMWInst *emitAtomicRMWInst( + llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, + llvm::AtomicOrdering Order = llvm::AtomicOrdering::SequentiallyConsistent, + llvm::SyncScope::ID SSID = llvm::SyncScope::System); + void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index f242d9e36ed40..1bd821e7206b9 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -333,6 +333,10 @@ class TargetCodeGenInfo { llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const; + /// Allow the target to apply other metadata to an atomic instruction + virtual void setTargetAtomicMetadata(CodeGenFunction &CGF, + llvm::AtomicRMWInst &RMW) const {} + /// Interface class for filling custom fields of a block literal for OpenCL. class TargetOpenCLBlockHelper { public: diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 4d3275e17c386..37e6af3d4196a 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -311,6 +311,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { SyncScope Scope, llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const override; + void setTargetAtomicMetadata(CodeGenFunction &CGF, + llvm::AtomicRMWInst &RMW) const override; llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, llvm::Type *BlockTy) const override; @@ -546,6 +548,23 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, return Ctx.getOrInsertSyncScopeID(Name); } +void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata( + CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const { + if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics()) + return; + + // TODO: Introduce new, more controlled options that also work for integers, + // and deprecate allowAMDGPUUnsafeFPAtomics. + llvm::AtomicRMWInst::BinOp RMWOp = RMW.getOperation(); + if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) { + llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {}); + RMW.setMetadata("amdgpu.no.fine.grained.memory", Empty); + + if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW.getType()->isFloatTy()) + RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty); + } +} + bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { return false; } diff --git a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c new file mode 100644 index 0000000000000..6deff1116e1d8 --- /dev/null +++ b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c @@ -0,0 +1,316 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,SAFE %s +// RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -munsafe-fp-atomics -o - %s | FileCheck -check-prefixes=CHECK,UNSAFE %s + +// SAFE-LABEL: define dso_local float @test_float_post_inc( +// SAFE-SAME: ) #[[ATTR0:[0-9]+]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// SAFE-NEXT: ret float [[TMP0]] +// +// UNSAFE-LABEL: define dso_local float @test_float_post_inc( +// UNSAFE-SAME: ) #[[ATTR0:[0-9]+]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: ret float [[TMP0]] +// +float test_float_post_inc() +{ + static _Atomic float n; + return n++; +} + +// SAFE-LABEL: define dso_local float @test_float_post_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// SAFE-NEXT: ret float [[TMP0]] +// +// UNSAFE-LABEL: define dso_local float @test_float_post_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: ret float [[TMP0]] +// +float test_float_post_dc() +{ + static _Atomic float n; + return n--; +} + +// SAFE-LABEL: define dso_local float @test_float_pre_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: ret float [[TMP1]] +// +// UNSAFE-LABEL: define dso_local float @test_float_pre_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: ret float [[TMP1]] +// +float test_float_pre_dc() +{ + static _Atomic float n; + return --n; +} + +// SAFE-LABEL: define dso_local float @test_float_pre_inc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: ret float [[TMP1]] +// +// UNSAFE-LABEL: define dso_local float @test_float_pre_inc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: ret float [[TMP1]] +// +float test_float_pre_inc() +{ + static _Atomic float n; + return ++n; +} + +// SAFE-LABEL: define dso_local double @test_double_post_inc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: ret double [[TMP1]] +// +// UNSAFE-LABEL: define dso_local double @test_double_post_inc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: ret double [[TMP1]] +// +double test_double_post_inc() +{ + static _Atomic double n; + return n++; +} + +// SAFE-LABEL: define dso_local double @test_double_post_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: ret double [[TMP1]] +// +// UNSAFE-LABEL: define dso_local double @test_double_post_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: ret double [[TMP1]] +// +double test_double_post_dc() +{ + static _Atomic double n; + return n--; +} + +// SAFE-LABEL: define dso_local double @test_double_pre_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: ret double [[TMP2]] +// +// UNSAFE-LABEL: define dso_local double @test_double_pre_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: ret double [[TMP2]] +// +double test_double_pre_dc() +{ + static _Atomic double n; + return --n; +} + +// SAFE-LABEL: define dso_local double @test_double_pre_inc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: ret double [[TMP2]] +// +// UNSAFE-LABEL: define dso_local double @test_double_pre_inc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: ret double [[TMP2]] +// +double test_double_pre_inc() +{ + static _Atomic double n; + return ++n; +} + +// SAFE-LABEL: define dso_local half @test__Float16_post_inc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: ret half [[TMP1]] +// +// UNSAFE-LABEL: define dso_local half @test__Float16_post_inc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: ret half [[TMP1]] +// +_Float16 test__Float16_post_inc() +{ + static _Atomic _Float16 n; + return n++; +} + +// SAFE-LABEL: define dso_local half @test__Float16_post_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: ret half [[TMP1]] +// +// UNSAFE-LABEL: define dso_local half @test__Float16_post_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: ret half [[TMP1]] +// +_Float16 test__Float16_post_dc() +{ + static _Atomic _Float16 n; + return n--; +} + +// SAFE-LABEL: define dso_local half @test__Float16_pre_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: ret half [[TMP2]] +// +// UNSAFE-LABEL: define dso_local half @test__Float16_pre_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: ret half [[TMP2]] +// +_Float16 test__Float16_pre_dc() +{ + static _Atomic _Float16 n; + return --n; +} + +// SAFE-LABEL: define dso_local half @test__Float16_pre_inc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: ret half [[TMP2]] +// +// UNSAFE-LABEL: define dso_local half @test__Float16_pre_inc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: ret half [[TMP2]] +// +_Float16 test__Float16_pre_inc() +{ + static _Atomic _Float16 n; + return ++n; +} +//. +// UNSAFE: [[META3]] = !{} +//. +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu index eeb23bc7e1c01..55ddb52da311e 100644 --- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu +++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu @@ -1,6 +1,10 @@ // RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ -// RUN: -fnative-half-arguments-and-returns | FileCheck %s +// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=CHECK,SAFEIR %s + +// RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \ +// RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics | FileCheck -check-prefixes=CHECK,UNSAFEIR %s // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \ @@ -18,24 +22,38 @@ __global__ void ffp1(float *p) { // CHECK-LABEL: @_Z4ffp1Pf - // CHECK: atomicrmw fadd ptr {{.*}} monotonic - // CHECK: atomicrmw fmax ptr {{.*}} monotonic - // CHECK: atomicrmw fmin ptr {{.*}} monotonic - // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic - // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic + // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}} + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4{{$}} + // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4{{$}} + // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4{{$}} + // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4{{$}} + // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}} + + // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // SAFE: _Z4ffp1Pf // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // UNSAFE: _Z4ffp1Pf // UNSAFE: global_atomic_add_f32 // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap + // UNSAFE: global_atomic_cmpswap + __atomic_fetch_add(p, 1.0f, memory_order_relaxed); + __atomic_fetch_sub(p, 1.0f, memory_order_relaxed); __atomic_fetch_max(p, 1.0f, memory_order_relaxed); __atomic_fetch_min(p, 1.0f, memory_order_relaxed); __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); @@ -44,23 +62,36 @@ __global__ void ffp1(float *p) { __global__ void ffp2(double *p) { // CHECK-LABEL: @_Z4ffp2Pd - // CHECK: atomicrmw fsub ptr {{.*}} monotonic - // CHECK: atomicrmw fmax ptr {{.*}} monotonic - // CHECK: atomicrmw fmin ptr {{.*}} monotonic - // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic - // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic + // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}} + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} + // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}} + // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}} + // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}} + // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}} + + // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // SAFE-LABEL: @_Z4ffp2Pd // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 + // SAFE: global_atomic_cmpswap_b64 + // UNSAFE-LABEL: @_Z4ffp2Pd + // UNSAFE: global_atomic_add_f64 // UNSAFE: global_atomic_cmpswap_x2 // UNSAFE: global_atomic_cmpswap_x2 // UNSAFE: global_atomic_cmpswap_x2 // UNSAFE: global_atomic_max_f64 // UNSAFE: global_atomic_min_f64 + __atomic_fetch_add(p, 1.0, memory_order_relaxed); __atomic_fetch_sub(p, 1.0, memory_order_relaxed); __atomic_fetch_max(p, 1.0, memory_order_relaxed); __atomic_fetch_min(p, 1.0, memory_order_relaxed); @@ -71,11 +102,20 @@ __global__ void ffp2(double *p) { // long double is the same as double for amdgcn. __global__ void ffp3(long double *p) { // CHECK-LABEL: @_Z4ffp3Pe - // CHECK: atomicrmw fsub ptr {{.*}} monotonic - // CHECK: atomicrmw fmax ptr {{.*}} monotonic - // CHECK: atomicrmw fmin ptr {{.*}} monotonic - // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic - // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic + // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}} + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} + // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}} + // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}} + // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}} + // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}} + + // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // SAFE-LABEL: @_Z4ffp3Pe // SAFE: global_atomic_cmpswap_b64 // SAFE: global_atomic_cmpswap_b64 @@ -88,6 +128,7 @@ __global__ void ffp3(long double *p) { // UNSAFE: global_atomic_cmpswap_x2 // UNSAFE: global_atomic_max_f64 // UNSAFE: global_atomic_min_f64 + __atomic_fetch_add(p, 1.0L, memory_order_relaxed); __atomic_fetch_sub(p, 1.0L, memory_order_relaxed); __atomic_fetch_max(p, 1.0L, memory_order_relaxed); __atomic_fetch_min(p, 1.0L, memory_order_relaxed); @@ -98,37 +139,52 @@ __global__ void ffp3(long double *p) { __device__ double ffp4(double *p, float f) { // CHECK-LABEL: @_Z4ffp4Pdf // CHECK: fpext float {{.*}} to double - // CHECK: atomicrmw fsub ptr {{.*}} monotonic + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} return __atomic_fetch_sub(p, f, memory_order_relaxed); } __device__ double ffp5(double *p, int i) { // CHECK-LABEL: @_Z4ffp5Pdi // CHECK: sitofp i32 {{.*}} to double - // CHECK: atomicrmw fsub ptr {{.*}} monotonic + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}} + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} return __atomic_fetch_sub(p, i, memory_order_relaxed); } __global__ void ffp6(_Float16 *p) { // CHECK-LABEL: @_Z4ffp6PDF16 - // CHECK: atomicrmw fadd ptr {{.*}} monotonic - // CHECK: atomicrmw fmax ptr {{.*}} monotonic - // CHECK: atomicrmw fmin ptr {{.*}} monotonic - // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic - // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic + // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2{{$}} + // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2{{$}} + // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2{{$}} + // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2{{$}} + // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2{{$}} + // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2{{$}} + + // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + // SAFE: _Z4ffp6PDF16 // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap // SAFE: global_atomic_cmpswap + // SAFE: global_atomic_cmpswap + // UNSAFE: _Z4ffp6PDF16 // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap // UNSAFE: global_atomic_cmpswap + // UNSAFE: global_atomic_cmpswap __atomic_fetch_add(p, 1.0, memory_order_relaxed); + __atomic_fetch_sub(p, 1.0, memory_order_relaxed); __atomic_fetch_max(p, 1.0, memory_order_relaxed); __atomic_fetch_min(p, 1.0, memory_order_relaxed); __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); diff --git a/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp b/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp new file mode 100644 index 0000000000000..7a34113cec8fa --- /dev/null +++ b/clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp @@ -0,0 +1,59 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck -check-prefix=DEFAULT %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -munsafe-fp-atomics -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s + +#pragma omp declare target + +float fv, fx; +double dv, dx; + +// DEFAULT-LABEL: define hidden void @_Z15atomic_fadd_f32v( +// DEFAULT-SAME: ) #[[ATTR0:[0-9]+]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4 +// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4 +// DEFAULT-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]] +// DEFAULT-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4 +// DEFAULT-NEXT: ret void +// +// UNSAFE-FP-ATOMICS-LABEL: define hidden void @_Z15atomic_fadd_f32v( +// UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0:[0-9]+]] { +// UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]] +// UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4 +// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]], !amdgpu.ignore.denormal.mode [[META5]] +// UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]] +// UNSAFE-FP-ATOMICS-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4 +// UNSAFE-FP-ATOMICS-NEXT: ret void +// +void atomic_fadd_f32() { +#pragma omp atomic capture + fv = fx = fx + fv; +} + +// DEFAULT-LABEL: define hidden void @_Z15atomic_fadd_f64v( +// DEFAULT-SAME: ) #[[ATTR0]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8 +// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8 +// DEFAULT-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]] +// DEFAULT-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8 +// DEFAULT-NEXT: ret void +// +// UNSAFE-FP-ATOMICS-LABEL: define hidden void @_Z15atomic_fadd_f64v( +// UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0]] { +// UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]] +// UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8 +// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]] +// UNSAFE-FP-ATOMICS-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8 +// UNSAFE-FP-ATOMICS-NEXT: ret void +// +void atomic_fadd_f64() { +#pragma omp atomic capture + dv = dx = dx + dv; +} + +#pragma omp end declare target +//. +// UNSAFE-FP-ATOMICS: [[META5]] = !{} +//. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits