https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/96739
These have been replaced with atomicrmw. >From e95c252f91dea9dbb89711eb3b851fcfe6555f7c Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Tue, 11 Jun 2024 11:46:15 +0200 Subject: [PATCH] AMDGPU: Remove ds_fmin/ds_fmax intrinsics These have been replaced with atomicrmw. --- llvm/docs/ReleaseNotes.rst | 5 ++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 14 ----- llvm/lib/IR/AutoUpgrade.cpp | 8 ++- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 32 ------------ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 3 -- .../Target/AMDGPU/AMDGPUSearchableTables.td | 2 - .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 20 +------ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 15 +----- llvm/test/Bitcode/amdgcn-atomic.ll | 52 +++++++++++++++++++ 9 files changed, 65 insertions(+), 86 deletions(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 76356dd76f1d2..7644da2b78bd7 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -132,6 +132,11 @@ Changes to the AMDGPU Backend * Implemented :ref:`llvm.get.rounding <int_get_rounding>` and :ref:`llvm.set.rounding <int_set_rounding>` +* Removed ``llvm.amdgcn.ds.fadd``, ``llvm.amdgcn.ds.fmin`` and + ``llvm.amdgcn.ds.fmax`` intrinsics. Users should use the + :ref:`atomicrmw <i_atomicrmw>` instruction with `fadd`, `fmin` and + `fmax` with addrspace(3) instead. + Changes to the ARM Backend -------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 11662ccc1a695..2aa52ef99aaf8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -523,17 +523,6 @@ def int_amdgcn_fmad_ftz : [IntrNoMem, IntrSpeculatable] >; -class AMDGPULDSIntrin : - Intrinsic<[llvm_any_ty], - [LLVMQualPointerType<3>, - LLVMMatchType<0>, - llvm_i32_ty, // ordering - llvm_i32_ty, // scope - llvm_i1_ty], // isVolatile - [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, - ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree] ->; - // FIXME: The m0 argument should be moved after the normal arguments class AMDGPUDSOrderedIntrinsic : Intrinsic< [llvm_i32_ty], @@ -571,9 +560,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; -def int_amdgcn_ds_fmin : AMDGPULDSIntrin; -def int_amdgcn_ds_fmax : AMDGPULDSIntrin; - } // TargetPrefix = "amdgcn" // New-style image intrinsics diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index d7825d9b3e3e5..32076a07d30e7 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1033,8 +1033,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, break; // No other 'amdgcn.atomic.*' } - if (Name.starts_with("ds.fadd")) { - // Replaced with atomicrmw fadd, so there's no new declaration. + if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || + Name.starts_with("ds.fmax")) { + // Replaced with atomicrmw fadd/fmin/fmax, so there's no new + // declaration. NewFn = nullptr; return true; } @@ -2347,6 +2349,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst::BinOp RMWOp = StringSwitch<AtomicRMWInst::BinOp>(Name) .StartsWith("ds.fadd", AtomicRMWInst::FAdd) + .StartsWith("ds.fmin", AtomicRMWInst::FMin) + .StartsWith("ds.fmax", AtomicRMWInst::FMax) .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4b48091b7143e..83a5933ceaed6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5401,35 +5401,6 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI, return true; } -static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) { - switch (IID) { - case Intrinsic::amdgcn_ds_fmin: - return AMDGPU::G_ATOMICRMW_FMIN; - case Intrinsic::amdgcn_ds_fmax: - return AMDGPU::G_ATOMICRMW_FMAX; - default: - llvm_unreachable("not a DS FP intrinsic"); - } -} - -bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, - MachineInstr &MI, - Intrinsic::ID IID) const { - GISelChangeObserver &Observer = Helper.Observer; - Observer.changingInstr(MI); - - MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID))); - - // The remaining operands were used to set fields in the MemOperand on - // construction. - for (int I = 6; I > 3; --I) - MI.removeOperand(I); - - MI.removeOperand(1); // Remove the intrinsic ID. - Observer.changedInstr(MI); - return true; -} - // TODO: Fix pointer type handling bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI, @@ -7423,9 +7394,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return legalizeBufferAtomic(MI, B, IntrID); case Intrinsic::amdgcn_rsq_clamp: return legalizeRsqClampIntrinsic(MI, MRI, B); - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: - return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID); case Intrinsic::amdgcn_image_bvh_intersect_ray: return legalizeBVHIntrinsic(MI, B); case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index ae01bb29c1108..db1c5874093a7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -175,9 +175,6 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeRsqClampIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; - bool legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, - MachineInstr &MI, Intrinsic::ID IID) const; - bool getImplicitArgPtr(Register DstReg, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index ed5bae3e4ff61..a323f63767737 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -252,8 +252,6 @@ def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>; def : SourceOfDivergence<int_amdgcn_global_atomic_fadd_v2bf16>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd_v2bf16>; -def : SourceOfDivergence<int_amdgcn_ds_fmin>; -def : SourceOfDivergence<int_amdgcn_ds_fmax>; def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>; def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>; def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 1192b49fd1f08..8882839ed8de3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -501,9 +501,7 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { switch (Inst->getIntrinsicID()) { case Intrinsic::amdgcn_ds_ordered_add: - case Intrinsic::amdgcn_ds_ordered_swap: - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: { + case Intrinsic::amdgcn_ds_ordered_swap: { auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2)); auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4)); if (!Ordering || !Volatile) @@ -1018,8 +1016,6 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { switch (IID) { - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: case Intrinsic::amdgcn_is_shared: case Intrinsic::amdgcn_is_private: case Intrinsic::amdgcn_flat_atomic_fadd: @@ -1039,20 +1035,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *NewV) const { auto IntrID = II->getIntrinsicID(); switch (IntrID) { - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: { - const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4)); - if (!IsVolatile->isZero()) - return nullptr; - Module *M = II->getParent()->getParent()->getParent(); - Type *DestTy = II->getType(); - Type *SrcTy = NewV->getType(); - Function *NewDecl = - Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); - II->setArgOperand(0, NewV); - II->setCalledFunction(NewDecl); - return II; - } case Intrinsic::amdgcn_is_shared: case Intrinsic::amdgcn_is_private: { unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ? diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b8ff5ed35ac80..0ed6d685ecfb0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1279,9 +1279,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, switch (IntrID) { case Intrinsic::amdgcn_ds_ordered_add: - case Intrinsic::amdgcn_ds_ordered_swap: - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: { + case Intrinsic::amdgcn_ds_ordered_swap: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); @@ -1450,8 +1448,6 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, case Intrinsic::amdgcn_atomic_cond_sub_u32: case Intrinsic::amdgcn_ds_append: case Intrinsic::amdgcn_ds_consume: - case Intrinsic::amdgcn_ds_fmax: - case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: case Intrinsic::amdgcn_flat_atomic_fadd: @@ -8869,15 +8865,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, M->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); } - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: { - MemSDNode *M = cast<MemSDNode>(Op); - unsigned Opc = IntrID == Intrinsic::amdgcn_ds_fmin ? ISD::ATOMIC_LOAD_FMIN - : ISD::ATOMIC_LOAD_FMAX; - return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(), M->getOperand(0), - M->getOperand(2), M->getOperand(3), - M->getMemOperand()); - } case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_ptr_buffer_load: case Intrinsic::amdgcn_raw_buffer_load_format: diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll index 311bd8863859b..ed7b04a2f3146 100644 --- a/llvm/test/Bitcode/amdgcn-atomic.ll +++ b/llvm/test/Bitcode/amdgcn-atomic.ll @@ -248,4 +248,56 @@ define <2 x i16> @upgrade_amdgcn_ds_fadd_v2bf16__missing_args_as_i16(ptr addrspa ret <2 x i16> %result0 } +declare float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) +declare double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) nocapture, double, i32 immarg, i32 immarg, i1 immarg) + +define float @upgrade_amdgcn_ds_fmin_f32(ptr addrspace(3) %ptr, float %val) { + ; CHECK: atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4 + %result0 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) + + ; CHECK: = atomicrmw volatile fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4 + %result1 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true) + + ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4 + %result2 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 43, i32 3, i1 false) + + ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") acquire, align 4 + %result3 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 4, i32 2, i1 false) + + ret float %result3 +} + +define double @upgrade_amdgcn_ds_fmin_f64(ptr addrspace(3) %ptr, double %val) { + ; CHECK: atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8 + %result0 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 false) + + ; CHECK: = atomicrmw volatile fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8 + %result1 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 true) + + ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8 + %result2 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 43, i32 3, i1 false) + + ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") acquire, align 8 + %result3 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 4, i32 2, i1 false) + + ret double %result3 +} + +declare float @llvm.amdgcn.ds.fmin(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) + +define float @upgrade_amdgcn_ds_fmin_f32_no_suffix(ptr addrspace(3) %ptr, float %val) { + ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4 + + %result0 = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) + ret float %result0 +} + +declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) + +define float @upgrade_amdgcn_ds_fmax_f32_no_suffix(ptr addrspace(3) %ptr, float %val) { + ; CHECK: = atomicrmw fmax ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4 + %result0 = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false) + ret float %result0 +} + attributes #0 = { argmemonly nounwind willreturn } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits