[clang] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (PR #68197)
https://github.com/stepthomas updated https://github.com/llvm/llvm-project/pull/68197 >From ed42bb63a3fe4a840071b74a3f9613dda815aa29 Mon Sep 17 00:00:00 2001 From: Stephen Thomas Date: Wed, 4 Oct 2023 10:16:49 +0100 Subject: [PATCH] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions The BUFFER_ATOMIC_CSUB and GLOBAL_ATOMIC_CSUB instructions have encodings for non-value-returning forms, although actually using them isn't supported by hardware. However, these encodings aren't supported by the backend, meaning that they can't even be assembled or disassembled. Add support for the non-returning encodings, but gate actually using them in instruction selection behind a new feature FeatureAtomicCsubNoRtnInsts, which no target uses. This does allow the non-returning instructions to be tested manually and llvm.amdgcn.atomic.csub.ll is extended to cover them. The feature does not gate assembling or disassembling them, this is now not an error. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 9 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 + llvm/lib/Target/AMDGPU/BUFInstructions.td | 19 +-- llvm/lib/Target/AMDGPU/FLATInstructions.td| 18 +-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++ llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + .../CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll | 51 --- llvm/test/MC/AMDGPU/gfx1030_err.s | 6 --- 8 files changed, 85 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bf5a7b0a96977c7..9ec062d45ba9f48 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -693,6 +693,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf [FeatureFlatGlobalInsts] >; +def FeatureAtomicCsubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts", + "HasAtomicCsubNoRtnInsts", + "true", + "Has buffer_atomic_csub and global_atomic_csub instructions that don't " + "return original value" +>; + def FeatureFlatAtomicFaddF32Inst : SubtargetFeature<"flat-atomic-fadd-f32-inst", "HasFlatAtomicFaddF32Inst", @@ -1927,6 +1934,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">; def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; +def HasAtomicCsubNoRtnInsts : Predicate<"Subtarget->hasAtomicCsubNoRtnInsts()">; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 12ccfd29f26c030..81fc28d293021ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -621,6 +621,7 @@ defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; +defm int_amdgcn_global_atomic_csub : noret_op; defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; defm int_amdgcn_ds_fadd_v2bf16 : noret_op; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index cec35d1147bb0ae..3e71141431d7dc9 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1022,10 +1022,16 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < "buffer_atomic_dec_x2", VReg_64, i64 >; -let SubtargetPredicate = HasGFX10_BEncoding in -defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN < - "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub ->; +let SubtargetPredicate = HasGFX10_BEncoding in { + defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN < +"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub + >; + + let OtherPredicates = [HasAtomicCsubNoRtnInsts] in +defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_NO_RTN < + "buffer_atomic_csub", VGPR_32, i32 +>; +} let SubtargetPredicate = isGFX8GFX9 in { def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; @@ -1561,6 +1567,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">; +let SubtargetPredicate = HasAtomicCsubNoRtnInsts in +defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>; + let SubtargetPredicate = isGFX6GFX7GFX10Plus in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">; defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">; @@ -2491,7 +2500,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; defm BUFFER_ATOMIC_FMIN_X2 : MUBU
[clang-tools-extra] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (PR #68197)
https://github.com/stepthomas updated https://github.com/llvm/llvm-project/pull/68197 >From ed42bb63a3fe4a840071b74a3f9613dda815aa29 Mon Sep 17 00:00:00 2001 From: Stephen Thomas Date: Wed, 4 Oct 2023 10:16:49 +0100 Subject: [PATCH] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions The BUFFER_ATOMIC_CSUB and GLOBAL_ATOMIC_CSUB instructions have encodings for non-value-returning forms, although actually using them isn't supported by hardware. However, these encodings aren't supported by the backend, meaning that they can't even be assembled or disassembled. Add support for the non-returning encodings, but gate actually using them in instruction selection behind a new feature FeatureAtomicCsubNoRtnInsts, which no target uses. This does allow the non-returning instructions to be tested manually and llvm.amdgcn.atomic.csub.ll is extended to cover them. The feature does not gate assembling or disassembling them, this is now not an error. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 9 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 + llvm/lib/Target/AMDGPU/BUFInstructions.td | 19 +-- llvm/lib/Target/AMDGPU/FLATInstructions.td| 18 +-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++ llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + .../CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll | 51 --- llvm/test/MC/AMDGPU/gfx1030_err.s | 6 --- 8 files changed, 85 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bf5a7b0a96977c7..9ec062d45ba9f48 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -693,6 +693,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf [FeatureFlatGlobalInsts] >; +def FeatureAtomicCsubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts", + "HasAtomicCsubNoRtnInsts", + "true", + "Has buffer_atomic_csub and global_atomic_csub instructions that don't " + "return original value" +>; + def FeatureFlatAtomicFaddF32Inst : SubtargetFeature<"flat-atomic-fadd-f32-inst", "HasFlatAtomicFaddF32Inst", @@ -1927,6 +1934,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">; def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; +def HasAtomicCsubNoRtnInsts : Predicate<"Subtarget->hasAtomicCsubNoRtnInsts()">; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 12ccfd29f26c030..81fc28d293021ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -621,6 +621,7 @@ defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; +defm int_amdgcn_global_atomic_csub : noret_op; defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; defm int_amdgcn_ds_fadd_v2bf16 : noret_op; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index cec35d1147bb0ae..3e71141431d7dc9 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1022,10 +1022,16 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < "buffer_atomic_dec_x2", VReg_64, i64 >; -let SubtargetPredicate = HasGFX10_BEncoding in -defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN < - "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub ->; +let SubtargetPredicate = HasGFX10_BEncoding in { + defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN < +"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub + >; + + let OtherPredicates = [HasAtomicCsubNoRtnInsts] in +defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_NO_RTN < + "buffer_atomic_csub", VGPR_32, i32 +>; +} let SubtargetPredicate = isGFX8GFX9 in { def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; @@ -1561,6 +1567,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">; +let SubtargetPredicate = HasAtomicCsubNoRtnInsts in +defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>; + let SubtargetPredicate = isGFX6GFX7GFX10Plus in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">; defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">; @@ -2491,7 +2500,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; defm BUFFER_ATOMIC_FMIN_X2 : MUBU
[libunwind] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (PR #68197)
https://github.com/stepthomas updated https://github.com/llvm/llvm-project/pull/68197 >From ed42bb63a3fe4a840071b74a3f9613dda815aa29 Mon Sep 17 00:00:00 2001 From: Stephen Thomas Date: Wed, 4 Oct 2023 10:16:49 +0100 Subject: [PATCH] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions The BUFFER_ATOMIC_CSUB and GLOBAL_ATOMIC_CSUB instructions have encodings for non-value-returning forms, although actually using them isn't supported by hardware. However, these encodings aren't supported by the backend, meaning that they can't even be assembled or disassembled. Add support for the non-returning encodings, but gate actually using them in instruction selection behind a new feature FeatureAtomicCsubNoRtnInsts, which no target uses. This does allow the non-returning instructions to be tested manually and llvm.amdgcn.atomic.csub.ll is extended to cover them. The feature does not gate assembling or disassembling them, this is now not an error. --- llvm/lib/Target/AMDGPU/AMDGPU.td | 9 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 + llvm/lib/Target/AMDGPU/BUFInstructions.td | 19 +-- llvm/lib/Target/AMDGPU/FLATInstructions.td| 18 +-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++ llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + .../CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll | 51 --- llvm/test/MC/AMDGPU/gfx1030_err.s | 6 --- 8 files changed, 85 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bf5a7b0a96977c7..9ec062d45ba9f48 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -693,6 +693,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf [FeatureFlatGlobalInsts] >; +def FeatureAtomicCsubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts", + "HasAtomicCsubNoRtnInsts", + "true", + "Has buffer_atomic_csub and global_atomic_csub instructions that don't " + "return original value" +>; + def FeatureFlatAtomicFaddF32Inst : SubtargetFeature<"flat-atomic-fadd-f32-inst", "HasFlatAtomicFaddF32Inst", @@ -1927,6 +1934,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">; def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; +def HasAtomicCsubNoRtnInsts : Predicate<"Subtarget->hasAtomicCsubNoRtnInsts()">; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 12ccfd29f26c030..81fc28d293021ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -621,6 +621,7 @@ defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; defm int_amdgcn_global_atomic_fmin : noret_op; defm int_amdgcn_global_atomic_fmax : noret_op; +defm int_amdgcn_global_atomic_csub : noret_op; defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; defm int_amdgcn_ds_fadd_v2bf16 : noret_op; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index cec35d1147bb0ae..3e71141431d7dc9 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1022,10 +1022,16 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < "buffer_atomic_dec_x2", VReg_64, i64 >; -let SubtargetPredicate = HasGFX10_BEncoding in -defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN < - "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub ->; +let SubtargetPredicate = HasGFX10_BEncoding in { + defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN < +"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub + >; + + let OtherPredicates = [HasAtomicCsubNoRtnInsts] in +defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_NO_RTN < + "buffer_atomic_csub", VGPR_32, i32 +>; +} let SubtargetPredicate = isGFX8GFX9 in { def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; @@ -1561,6 +1567,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">; defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">; +let SubtargetPredicate = HasAtomicCsubNoRtnInsts in +defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>; + let SubtargetPredicate = isGFX6GFX7GFX10Plus in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">; defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">; @@ -2491,7 +2500,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; defm BUFFER_ATOMIC_FMIN_X2 : MUBU
[clang-tools-extra] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (PR #68197)
https://github.com/stepthomas closed https://github.com/llvm/llvm-project/pull/68197 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits