llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> These have been replaced with atomicrmw --- Patch is 89.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105642.diff 18 Files Affected: - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (-5) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+16-9) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (-4) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (-4) - (modified) llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td (-4) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (-4) - (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (-11) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (-16) - (modified) llvm/test/Bitcode/amdgcn-atomic.ll (+64) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll (+32-204) - (modified) llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll (+5-4) - (removed) llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics-f64.ll (-51) - (removed) llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll (-83) - (removed) llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-f64.ll (-51) - (removed) llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics.ll (-87) - (modified) llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll (+4-360) - (removed) llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll (-224) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll (+6-57) ``````````diff diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index dc13a35c66f9ab..9f2a3a985a56b5 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2945,11 +2945,6 @@ def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v // gfx90a intrinsics // ===----------------------------------------------------------------------===// -def int_amdgcn_global_atomic_fmin : AMDGPUAtomicRtn<llvm_anyfloat_ty>; -def int_amdgcn_global_atomic_fmax : AMDGPUAtomicRtn<llvm_anyfloat_ty>; -def int_amdgcn_flat_atomic_fmin : AMDGPUAtomicRtn<llvm_anyfloat_ty>; -def int_amdgcn_flat_atomic_fmax : AMDGPUAtomicRtn<llvm_anyfloat_ty>; - defset list<Intrinsic> AMDGPUMFMAIntrinsics90A = { def int_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v4i16_ty>; def int_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 8dd5b9b3ec3d1f..d2e00c928b1028 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1033,14 +1033,17 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, break; // No other 'amdgcn.atomic.*' } - if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") || - Name.starts_with("ds.fmax") || - Name.starts_with("global.atomic.fadd") || - Name.starts_with("flat.atomic.fadd")) { - // Replaced with atomicrmw fadd/fmin/fmax, so there's no new - // declaration. - NewFn = nullptr; - return true; + if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") || + Name.consume_front("flat.atomic.")) { + if (Name.starts_with("fadd") || + // FIXME: We should also remove fmin.num and fmax.num intrinsics. + (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) || + (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) { + // Replaced with atomicrmw fadd/fmin/fmax, so there's no new + // declaration. + NewFn = nullptr; + return true; + } } if (Name.starts_with("ldexp.")) { @@ -4046,7 +4049,11 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap) .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd) - .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd); + .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd) + .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin) + .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin) + .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax) + .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax); unsigned NumOperands = CI->getNumOperands(); if (NumOperands < 3) // Malformed bitcode. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index aa5b151adef3a4..09987a6504b9d0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -618,10 +618,6 @@ multiclass local_addr_space_atomic_op { } } -defm int_amdgcn_flat_atomic_fmin : noret_op; -defm int_amdgcn_flat_atomic_fmax : noret_op; -defm int_amdgcn_global_atomic_fmin : noret_op; -defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op; defm int_amdgcn_flat_atomic_fmin_num : noret_op; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 69a1936a11fe05..126fc4d9672d8d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4897,12 +4897,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case Intrinsic::amdgcn_global_atomic_csub: - case Intrinsic::amdgcn_global_atomic_fmin: - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_fmax_num: - case Intrinsic::amdgcn_flat_atomic_fmin: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmin_num: case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_atomic_cond_sub_u32: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 95c4859674ecc4..40423ed3e25871 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -239,13 +239,9 @@ def : SourceOfDivergence<int_r600_read_tidig_y>; def : SourceOfDivergence<int_r600_read_tidig_z>; def : SourceOfDivergence<int_amdgcn_atomic_cond_sub_u32>; def : SourceOfDivergence<int_amdgcn_global_atomic_csub>; -def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>; -def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>; def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>; def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>; def : SourceOfDivergence<int_amdgcn_global_atomic_ordered_add_b64>; -def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>; -def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>; def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index cb3fbdb850c1ac..b615639ac1b82e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1045,8 +1045,6 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, switch (IID) { case Intrinsic::amdgcn_is_shared: case Intrinsic::amdgcn_is_private: - case Intrinsic::amdgcn_flat_atomic_fmax: - case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_flat_atomic_fmin_num: OpIndexes.push_back(0); @@ -1106,8 +1104,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy}, {NewV, MaskOp}); } - case Intrinsic::amdgcn_flat_atomic_fmax: - case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_flat_atomic_fmin_num: { Type *DestTy = II->getType(); diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 7b3822067072e5..d5d1d27c3a850f 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1604,15 +1604,11 @@ let OtherPredicates = [isGFX12Plus] in { let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in { defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; -defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>; -defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>; } let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in { defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>; defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>; -defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>; -defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>; } let OtherPredicates = [isGFX12Only] in { @@ -1642,13 +1638,6 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_globa let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in { defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; -defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>; -defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>; -} - -let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in { -defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>; -defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; } let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c954c0aa71f734..464e77f66acfab 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1351,13 +1351,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, MachineMemOperand::MODereferenceable; return true; } - case Intrinsic::amdgcn_global_atomic_fmin: - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_fmax_num: case Intrinsic::amdgcn_global_atomic_ordered_add_b64: - case Intrinsic::amdgcn_flat_atomic_fmin: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmin_num: case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_atomic_cond_sub_u32: { @@ -1462,14 +1458,10 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, case Intrinsic::amdgcn_ds_consume: case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmax_num: - case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_csub: - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmax_num: - case Intrinsic::amdgcn_global_atomic_fmin: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_ordered_add_b64: case Intrinsic::amdgcn_global_load_tr_b64: @@ -9285,12 +9277,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.setNodeMemRefs(NewNode, {MemRef}); return SDValue(NewNode, 0); } - case Intrinsic::amdgcn_global_atomic_fmin: - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_fmax_num: - case Intrinsic::amdgcn_flat_atomic_fmin: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmin_num: case Intrinsic::amdgcn_flat_atomic_fmax_num: { MemSDNode *M = cast<MemSDNode>(Op); @@ -9301,16 +9289,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, }; unsigned Opcode = 0; switch (IntrID) { - case Intrinsic::amdgcn_global_atomic_fmin: case Intrinsic::amdgcn_global_atomic_fmin_num: - case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmin_num: { Opcode = ISD::ATOMIC_LOAD_FMIN; break; } - case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmax_num: - case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmax_num: { Opcode = ISD::ATOMIC_LOAD_FMAX; break; diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll index d642372799f56b..af3338577f7163 100644 --- a/llvm/test/Bitcode/amdgcn-atomic.ll +++ b/llvm/test/Bitcode/amdgcn-atomic.ll @@ -354,4 +354,68 @@ define float @upgrade_amdgcn_global_atomic_fadd_f32_p1_f32(ptr addrspace(1) %ptr ret float %result } +declare float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr nocapture, float) #0 + +define float @upgrade_amdgcn_flat_atomic_fmin_f32_p0_f32(ptr %ptr, float %data) { + ; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %ptr, float %data) + ret float %result +} + +declare float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) nocapture, float) #0 + +define float @upgrade_amdgcn_global_atomic_fmin_f32_p1_f32(ptr addrspace(1) %ptr, float %data) { + ; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret float %result +} + +declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr nocapture, double) #0 + +define double @upgrade_amdgcn_flat_atomic_fmin_f64_p0_f64(ptr %ptr, double %data) { + ; CHECK: %{{.+}} = atomicrmw fmin ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) + ret double %result +} + +declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) nocapture, double) #0 + +define double @upgrade_amdgcn_global_atomic_fmin_f64_p1_f64(ptr addrspace(1) %ptr, double %data) { + ; CHECK: %{{.+}} = atomicrmw fmin ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) + ret double %result +} + +declare float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr nocapture, float) #0 + +define float @upgrade_amdgcn_flat_atomic_fmax_f32_p0_f32(ptr %ptr, float %data) { + ; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %ptr, float %data) + ret float %result +} + +declare float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) nocapture, float) #0 + +define float @upgrade_amdgcn_global_atomic_fmax_f32_p1_f32(ptr addrspace(1) %ptr, float %data) { + ; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret float %result +} + +declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr nocapture, double) #0 + +define double @upgrade_amdgcn_flat_atomic_fmax_f64_p0_f64(ptr %ptr, double %data) { + ; CHECK: %{{.+}} = atomicrmw fmax ptr %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) + ret double %result +} + +declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) nocapture, double) #0 + +define double @upgrade_amdgcn_global_atomic_fmax_f64_p1_f64(ptr addrspace(1) %ptr, double %data) { + ; CHECK: %{{.+}} = atomicrmw fmax ptr addrspace(1) %ptr, double %data syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + %result = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) + ret double %result +} + attributes #0 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll index eb39ca2d7daa7f..92ce2af47e22ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll @@ -14,10 +14,6 @@ declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32 declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg) declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg) -declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) -declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) { ; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64: @@ -1015,52 +1011,6 @@ main_body: ret void } -define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmin_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: global_atomic_fmin_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret void -} - -define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) { -; GFX90A-LABEL: global_atomic_fmax_f64_noret: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5] -; GFX90A-NEXT: s_endpgm -; -; GFX940-LABEL: global_atomic_fmax_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5] -; GFX940-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) - ret void -} - define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body @@ -1070,7 +1020,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX90A-NEXT: s_cbranch_execz .LBB38_2 +; GFX90A-NEXT: s_cbranch_execz .LBB36_2 ; GFX90A-NEXT: ; %bb.1: ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -1083,7 +1033,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol -; GFX90A-NEXT: .LBB38_2: +; GFX90A-NEXT: .LBB36_2: ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_atomic_fadd_f64_noret_pat: @@ -1094,7 +1044,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0 ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX940-NEXT: s_cbranch_execz .LBB38_2 +; GFX940-NEXT: s_cbranch_execz .LBB36_2 ; GFX940-NEXT: ; %bb.1: ; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24 ; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] @@ -... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/105642 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits