https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/116308
>From 1eebc858ad7c42b9ef42adfac1a93aa79d7a80f0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 22 May 2024 19:23:24 +0200 Subject: [PATCH] AMDGPU: Add subtarget features for minimum3/maximum3 instructions gfx12 and gfx950 managed to produce 3 different permutations of this feature. gfx12 supports f32 and f16, and gfx950 supports f32 and v2f16. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 22 ++++++++++++++++++++++ llvm/lib/Target/AMDGPU/GCNSubtarget.h | 11 ++++++++++- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 4 ++-- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index d028c1f5ca7613..35dbf86b7c6f36 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -137,6 +137,18 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts", "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" >; +def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32", + "HasMinimum3Maximum3F32", + "true", + "Has v_minimum3_f32 and v_maximum3_f32 instructions" +>; + +def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16", + "HasMinimum3Maximum3F16", + "true", + "Has v_minimum3_f16 and v_maximum3_f16 instructions" +>; + def FeatureSupportsXNACK : SubtargetFeature<"xnack-support", "SupportsXNACK", "true", @@ -1263,6 +1275,7 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", FeatureUnalignedDSAccess, FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, + FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics ] >; @@ -2005,6 +2018,15 @@ def isGFX12Plus : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">, AssemblerPredicate<(all_of FeatureGFX12Insts)>; +def HasMinimum3Maximum3F32 : + Predicate<"Subtarget->hasMinimum3Maximum3F32()">, + AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>; + +def HasMinimum3Maximum3F16 : + Predicate<"Subtarget->hasMinimum3Maximum3F16()">, + AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>; + + def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 1b06756a8a1016..2e7a06a15bd52a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -242,7 +242,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasForceStoreSC0SC1 = false; bool HasRequiredExportPriority = false; bool HasVmemWriteVgprInOrder = false; - + bool HasMinimum3Maximum3F32 = false; + bool HasMinimum3Maximum3F16 = false; bool RequiresCOV6 = false; // Dummy feature to use for assembler in tablegen. @@ -1307,6 +1308,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// \returns true if the target has instructions with xf32 format support. bool hasXF32Insts() const { return HasXF32Insts; } + bool hasMinimum3Maximum3F32() const { + return HasMinimum3Maximum3F32; + } + + bool hasMinimum3Maximum3F16() const { + return HasMinimum3Maximum3F16; + } + /// \returns The maximum number of instructions that can be enclosed in an /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that /// instruction. diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 34ecdb56e8689d..551e8b3a679202 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -226,7 +226,7 @@ let mayRaiseFPException = 0 in { defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>; } // End mayRaiseFPException = 0 -let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { +let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in { defm V_MINIMUM3_F32 : VOP3Inst <"v_minimum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfminimum3>; defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 @@ -625,7 +625,7 @@ defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3 defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>; defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>; -let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { +let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in { defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>; defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits