llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> Use this to replace the emission of the amdgpu-unsafe-fp-atomics attribute in favor of per-instruction metadata. In the future new fine grained controls should be introduced that also cover the integer cases. Add a wrapper around CreateAtomicRMW that appends the metadata, and update a few use contexts to use it. --- Patch is 38.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96906.diff 9 Files Affected: - (modified) clang/lib/CodeGen/CGAtomic.cpp (+12-1) - (modified) clang/lib/CodeGen/CGExprScalar.cpp (+7-6) - (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (+2-2) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+7) - (modified) clang/lib/CodeGen/TargetInfo.h (+4) - (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+19) - (added) clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c (+316) - (modified) clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu (+79-23) - (added) clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp (+59) ``````````diff diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index fbf942d06ca6e..fbe9569e50ef6 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -727,7 +727,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::AtomicRMWInst *RMWI = - CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope); + CGF.emitAtomicRMWInst(Op, Ptr, LoadVal1, Order, Scope); RMWI->setVolatile(E->isVolatile()); // For __atomic_*_fetch operations, perform the operation again to @@ -2034,6 +2034,17 @@ std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange( IsWeak); } +llvm::AtomicRMWInst * +CodeGenFunction::emitAtomicRMWInst(llvm::AtomicRMWInst::BinOp Op, Address Addr, + llvm::Value *Val, llvm::AtomicOrdering Order, + llvm::SyncScope::ID SSID) { + + llvm::AtomicRMWInst *RMW = + Builder.CreateAtomicRMW(Op, Addr, Val, Order, SSID); + getTargetHooks().setTargetAtomicMetadata(*this, *RMW); + return RMW; +} + void CodeGenFunction::EmitAtomicUpdate( LValue LVal, llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile) { diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index f40f3c273206b..8eb7a64c144c8 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2835,9 +2835,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub; llvm::Value *amt = llvm::ConstantFP::get( VMContext, llvm::APFloat(static_cast<float>(1.0))); - llvm::Value *old = - Builder.CreateAtomicRMW(aop, LV.getAddress(), amt, - llvm::AtomicOrdering::SequentiallyConsistent); + llvm::AtomicRMWInst *old = + CGF.emitAtomicRMWInst(aop, LV.getAddress(), amt, + llvm::AtomicOrdering::SequentiallyConsistent); + return isPre ? Builder.CreateBinOp(op, old, amt) : old; } value = EmitLoadOfLValue(LV, E->getExprLoc()); @@ -3577,9 +3578,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( EmitScalarConversion(OpInfo.RHS, E->getRHS()->getType(), LHSTy, E->getExprLoc()), LHSTy); - Value *OldVal = Builder.CreateAtomicRMW( - AtomicOp, LHSLV.getAddress(), Amt, - llvm::AtomicOrdering::SequentiallyConsistent); + + llvm::AtomicRMWInst *OldVal = + CGF.emitAtomicRMWInst(AtomicOp, LHSLV.getAddress(), Amt); // Since operation is atomic, the result type is guaranteed to be the // same as the input in LLVM terms. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f73d32de7c484..8c152fef73557 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6235,8 +6235,8 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC, X.getAddress().getElementType()); } - llvm::Value *Res = - CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); + llvm::AtomicRMWInst *Res = + CGF.emitAtomicRMWInst(RMWOp, X.getAddress(), UpdateVal, AO); return std::make_pair(true, RValue::get(Res)); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 13f12b5d878a6..6cfcb76eea42a 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4153,6 +4153,13 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored()); + /// Emit an atomicrmw instruction, and applying relevant metadata when + /// applicable. + llvm::AtomicRMWInst *emitAtomicRMWInst( + llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, + llvm::AtomicOrdering Order = llvm::AtomicOrdering::SequentiallyConsistent, + llvm::SyncScope::ID SSID = llvm::SyncScope::System); + void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index f242d9e36ed40..1bd821e7206b9 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -333,6 +333,10 @@ class TargetCodeGenInfo { llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const; + /// Allow the target to apply other metadata to an atomic instruction + virtual void setTargetAtomicMetadata(CodeGenFunction &CGF, + llvm::AtomicRMWInst &RMW) const {} + /// Interface class for filling custom fields of a block literal for OpenCL. class TargetOpenCLBlockHelper { public: diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 4d3275e17c386..37e6af3d4196a 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -311,6 +311,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { SyncScope Scope, llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const override; + void setTargetAtomicMetadata(CodeGenFunction &CGF, + llvm::AtomicRMWInst &RMW) const override; llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, llvm::Type *BlockTy) const override; @@ -546,6 +548,23 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, return Ctx.getOrInsertSyncScopeID(Name); } +void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata( + CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const { + if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics()) + return; + + // TODO: Introduce new, more controlled options that also work for integers, + // and deprecate allowAMDGPUUnsafeFPAtomics. + llvm::AtomicRMWInst::BinOp RMWOp = RMW.getOperation(); + if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) { + llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {}); + RMW.setMetadata("amdgpu.no.fine.grained.memory", Empty); + + if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW.getType()->isFloatTy()) + RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty); + } +} + bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { return false; } diff --git a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c new file mode 100644 index 0000000000000..6deff1116e1d8 --- /dev/null +++ b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c @@ -0,0 +1,316 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,SAFE %s +// RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -munsafe-fp-atomics -o - %s | FileCheck -check-prefixes=CHECK,UNSAFE %s + +// SAFE-LABEL: define dso_local float @test_float_post_inc( +// SAFE-SAME: ) #[[ATTR0:[0-9]+]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// SAFE-NEXT: ret float [[TMP0]] +// +// UNSAFE-LABEL: define dso_local float @test_float_post_inc( +// UNSAFE-SAME: ) #[[ATTR0:[0-9]+]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: ret float [[TMP0]] +// +float test_float_post_inc() +{ + static _Atomic float n; + return n++; +} + +// SAFE-LABEL: define dso_local float @test_float_post_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// SAFE-NEXT: ret float [[TMP0]] +// +// UNSAFE-LABEL: define dso_local float @test_float_post_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: ret float [[TMP0]] +// +float test_float_post_dc() +{ + static _Atomic float n; + return n--; +} + +// SAFE-LABEL: define dso_local float @test_float_pre_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: ret float [[TMP1]] +// +// UNSAFE-LABEL: define dso_local float @test_float_pre_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: ret float [[TMP1]] +// +float test_float_pre_dc() +{ + static _Atomic float n; + return --n; +} + +// SAFE-LABEL: define dso_local float @test_float_pre_inc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4 +// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: ret float [[TMP1]] +// +// UNSAFE-LABEL: define dso_local float @test_float_pre_inc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: ret float [[TMP1]] +// +float test_float_pre_inc() +{ + static _Atomic float n; + return ++n; +} + +// SAFE-LABEL: define dso_local double @test_double_post_inc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: ret double [[TMP1]] +// +// UNSAFE-LABEL: define dso_local double @test_double_post_inc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: ret double [[TMP1]] +// +double test_double_post_inc() +{ + static _Atomic double n; + return n++; +} + +// SAFE-LABEL: define dso_local double @test_double_post_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: ret double [[TMP1]] +// +// UNSAFE-LABEL: define dso_local double @test_double_post_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: ret double [[TMP1]] +// +double test_double_post_dc() +{ + static _Atomic double n; + return n--; +} + +// SAFE-LABEL: define dso_local double @test_double_pre_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: ret double [[TMP2]] +// +// UNSAFE-LABEL: define dso_local double @test_double_pre_dc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: ret double [[TMP2]] +// +double test_double_pre_dc() +{ + static _Atomic double n; + return --n; +} + +// SAFE-LABEL: define dso_local double @test_double_pre_inc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8 +// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// SAFE-NEXT: ret double [[TMP2]] +// +// UNSAFE-LABEL: define dso_local double @test_double_pre_inc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 +// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8 +// UNSAFE-NEXT: ret double [[TMP2]] +// +double test_double_pre_inc() +{ + static _Atomic double n; + return ++n; +} + +// SAFE-LABEL: define dso_local half @test__Float16_post_inc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// SAFE-NEXT: ret half [[TMP1]] +// +// UNSAFE-LABEL: define dso_local half @test__Float16_post_inc( +// UNSAFE-SAME: ) #[[ATTR0]] { +// UNSAFE-NEXT: [[ENTRY:.*:]] +// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]] +// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2 +// UNSAFE-NEXT: ret half [[TMP1]] +// +_Float16 test__Float16_post_inc() +{ + static _Atomic _Float16 n; + return n++; +} + +// SAFE-LABEL: define dso_local half @test__Float16_post_dc( +// SAFE-SAME: ) #[[ATTR0]] { +// SAFE-NEXT: [[ENTRY:.*:]] +// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5) +// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2 +// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/96906 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits