[clang] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (PR #68197)

2023-10-05 Thread Stephen Thomas via cfe-commits

https://github.com/stepthomas updated 
https://github.com/llvm/llvm-project/pull/68197

>From ed42bb63a3fe4a840071b74a3f9613dda815aa29 Mon Sep 17 00:00:00 2001
From: Stephen Thomas 
Date: Wed, 4 Oct 2023 10:16:49 +0100
Subject: [PATCH] [AMDGPU] Add encoding/decoding support for
 non-result-returning ATOMIC_CSUB instructions

The BUFFER_ATOMIC_CSUB and GLOBAL_ATOMIC_CSUB instructions have encodings for
non-value-returning forms, although actually using them isn't supported by
hardware. However, these encodings aren't supported by the backend, meaning
that they can't even be assembled or disassembled.

Add support for the non-returning encodings, but gate actually using them
in instruction selection behind a new feature FeatureAtomicCsubNoRtnInsts,
which no target uses. This does allow the non-returning instructions to be
tested manually and llvm.amdgcn.atomic.csub.ll is extended to cover them.
The feature does not gate assembling or disassembling them, this is now
not an error.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td  |  9 
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |  1 +
 llvm/lib/Target/AMDGPU/BUFInstructions.td | 19 +--
 llvm/lib/Target/AMDGPU/FLATInstructions.td| 18 +--
 llvm/lib/Target/AMDGPU/GCNSubtarget.h |  4 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |  1 +
 .../CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll | 51 ---
 llvm/test/MC/AMDGPU/gfx1030_err.s |  6 ---
 8 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index bf5a7b0a96977c7..9ec062d45ba9f48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -693,6 +693,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : 
SubtargetFeature<"atomic-global-pk-add-bf
  [FeatureFlatGlobalInsts]
 >;
 
+def FeatureAtomicCsubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts",
+  "HasAtomicCsubNoRtnInsts",
+  "true",
+  "Has buffer_atomic_csub and global_atomic_csub instructions that don't "
+  "return original value"
+>;
+
 def FeatureFlatAtomicFaddF32Inst
   : SubtargetFeature<"flat-atomic-fadd-f32-inst",
   "HasFlatAtomicFaddF32Inst",
@@ -1927,6 +1934,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">;
 def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
 def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
 
+def HasAtomicCsubNoRtnInsts : 
Predicate<"Subtarget->hasAtomicCsubNoRtnInsts()">;
+
 // Include AMDGPU TD files
 include "SISchedule.td"
 include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 12ccfd29f26c030..81fc28d293021ab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -621,6 +621,7 @@ defm int_amdgcn_flat_atomic_fadd : 
global_addr_space_atomic_op;
 defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
 defm int_amdgcn_global_atomic_fmin : noret_op;
 defm int_amdgcn_global_atomic_fmax : noret_op;
+defm int_amdgcn_global_atomic_csub : noret_op;
 defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
 defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
 
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td 
b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index cec35d1147bb0ae..3e71141431d7dc9 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1022,10 +1022,16 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
   "buffer_atomic_dec_x2", VReg_64, i64
 >;
 
-let SubtargetPredicate = HasGFX10_BEncoding in
-defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
-  "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
->;
+let SubtargetPredicate = HasGFX10_BEncoding in {
+  defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
+"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
+  >;
+
+  let OtherPredicates = [HasAtomicCsubNoRtnInsts] in
+defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_NO_RTN <
+  "buffer_atomic_csub", VGPR_32, i32
+>;
+}
 
 let SubtargetPredicate = isGFX8GFX9 in {
 def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
@@ -1561,6 +1567,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, 
"BUFFER_ATOMIC_XOR_X2">;
 defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
 defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
 
+let SubtargetPredicate = HasAtomicCsubNoRtnInsts in
+defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", 
["noret"]>;
+
 let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
@@ -2491,7 +2500,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : 
MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
 defm BUFFER_ATOMIC_FMIN_X2 : MUBU

[clang-tools-extra] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (PR #68197)

2023-10-06 Thread Stephen Thomas via cfe-commits

https://github.com/stepthomas updated 
https://github.com/llvm/llvm-project/pull/68197

>From ed42bb63a3fe4a840071b74a3f9613dda815aa29 Mon Sep 17 00:00:00 2001
From: Stephen Thomas 
Date: Wed, 4 Oct 2023 10:16:49 +0100
Subject: [PATCH] [AMDGPU] Add encoding/decoding support for
 non-result-returning ATOMIC_CSUB instructions

The BUFFER_ATOMIC_CSUB and GLOBAL_ATOMIC_CSUB instructions have encodings for
non-value-returning forms, although actually using them isn't supported by
hardware. However, these encodings aren't supported by the backend, meaning
that they can't even be assembled or disassembled.

Add support for the non-returning encodings, but gate actually using them
in instruction selection behind a new feature FeatureAtomicCsubNoRtnInsts,
which no target uses. This does allow the non-returning instructions to be
tested manually and llvm.amdgcn.atomic.csub.ll is extended to cover them.
The feature does not gate assembling or disassembling them, this is now
not an error.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td  |  9 
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |  1 +
 llvm/lib/Target/AMDGPU/BUFInstructions.td | 19 +--
 llvm/lib/Target/AMDGPU/FLATInstructions.td| 18 +--
 llvm/lib/Target/AMDGPU/GCNSubtarget.h |  4 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |  1 +
 .../CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll | 51 ---
 llvm/test/MC/AMDGPU/gfx1030_err.s |  6 ---
 8 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index bf5a7b0a96977c7..9ec062d45ba9f48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -693,6 +693,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : 
SubtargetFeature<"atomic-global-pk-add-bf
  [FeatureFlatGlobalInsts]
 >;
 
+def FeatureAtomicCsubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts",
+  "HasAtomicCsubNoRtnInsts",
+  "true",
+  "Has buffer_atomic_csub and global_atomic_csub instructions that don't "
+  "return original value"
+>;
+
 def FeatureFlatAtomicFaddF32Inst
   : SubtargetFeature<"flat-atomic-fadd-f32-inst",
   "HasFlatAtomicFaddF32Inst",
@@ -1927,6 +1934,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">;
 def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
 def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
 
+def HasAtomicCsubNoRtnInsts : 
Predicate<"Subtarget->hasAtomicCsubNoRtnInsts()">;
+
 // Include AMDGPU TD files
 include "SISchedule.td"
 include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 12ccfd29f26c030..81fc28d293021ab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -621,6 +621,7 @@ defm int_amdgcn_flat_atomic_fadd : 
global_addr_space_atomic_op;
 defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
 defm int_amdgcn_global_atomic_fmin : noret_op;
 defm int_amdgcn_global_atomic_fmax : noret_op;
+defm int_amdgcn_global_atomic_csub : noret_op;
 defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
 defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
 
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td 
b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index cec35d1147bb0ae..3e71141431d7dc9 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1022,10 +1022,16 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
   "buffer_atomic_dec_x2", VReg_64, i64
 >;
 
-let SubtargetPredicate = HasGFX10_BEncoding in
-defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
-  "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
->;
+let SubtargetPredicate = HasGFX10_BEncoding in {
+  defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
+"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
+  >;
+
+  let OtherPredicates = [HasAtomicCsubNoRtnInsts] in
+defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_NO_RTN <
+  "buffer_atomic_csub", VGPR_32, i32
+>;
+}
 
 let SubtargetPredicate = isGFX8GFX9 in {
 def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
@@ -1561,6 +1567,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, 
"BUFFER_ATOMIC_XOR_X2">;
 defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
 defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
 
+let SubtargetPredicate = HasAtomicCsubNoRtnInsts in
+defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", 
["noret"]>;
+
 let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
@@ -2491,7 +2500,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : 
MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
 defm BUFFER_ATOMIC_FMIN_X2 : MUBU

[libunwind] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (PR #68197)

2023-10-06 Thread Stephen Thomas via cfe-commits

https://github.com/stepthomas updated 
https://github.com/llvm/llvm-project/pull/68197

>From ed42bb63a3fe4a840071b74a3f9613dda815aa29 Mon Sep 17 00:00:00 2001
From: Stephen Thomas 
Date: Wed, 4 Oct 2023 10:16:49 +0100
Subject: [PATCH] [AMDGPU] Add encoding/decoding support for
 non-result-returning ATOMIC_CSUB instructions

The BUFFER_ATOMIC_CSUB and GLOBAL_ATOMIC_CSUB instructions have encodings for
non-value-returning forms, although actually using them isn't supported by
hardware. However, these encodings aren't supported by the backend, meaning
that they can't even be assembled or disassembled.

Add support for the non-returning encodings, but gate actually using them
in instruction selection behind a new feature FeatureAtomicCsubNoRtnInsts,
which no target uses. This does allow the non-returning instructions to be
tested manually and llvm.amdgcn.atomic.csub.ll is extended to cover them.
The feature does not gate assembling or disassembling them, this is now
not an error.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td  |  9 
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |  1 +
 llvm/lib/Target/AMDGPU/BUFInstructions.td | 19 +--
 llvm/lib/Target/AMDGPU/FLATInstructions.td| 18 +--
 llvm/lib/Target/AMDGPU/GCNSubtarget.h |  4 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |  1 +
 .../CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll | 51 ---
 llvm/test/MC/AMDGPU/gfx1030_err.s |  6 ---
 8 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index bf5a7b0a96977c7..9ec062d45ba9f48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -693,6 +693,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : 
SubtargetFeature<"atomic-global-pk-add-bf
  [FeatureFlatGlobalInsts]
 >;
 
+def FeatureAtomicCsubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts",
+  "HasAtomicCsubNoRtnInsts",
+  "true",
+  "Has buffer_atomic_csub and global_atomic_csub instructions that don't "
+  "return original value"
+>;
+
 def FeatureFlatAtomicFaddF32Inst
   : SubtargetFeature<"flat-atomic-fadd-f32-inst",
   "HasFlatAtomicFaddF32Inst",
@@ -1927,6 +1934,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">;
 def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
 def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
 
+def HasAtomicCsubNoRtnInsts : 
Predicate<"Subtarget->hasAtomicCsubNoRtnInsts()">;
+
 // Include AMDGPU TD files
 include "SISchedule.td"
 include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 12ccfd29f26c030..81fc28d293021ab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -621,6 +621,7 @@ defm int_amdgcn_flat_atomic_fadd : 
global_addr_space_atomic_op;
 defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
 defm int_amdgcn_global_atomic_fmin : noret_op;
 defm int_amdgcn_global_atomic_fmax : noret_op;
+defm int_amdgcn_global_atomic_csub : noret_op;
 defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
 defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
 
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td 
b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index cec35d1147bb0ae..3e71141431d7dc9 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1022,10 +1022,16 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
   "buffer_atomic_dec_x2", VReg_64, i64
 >;
 
-let SubtargetPredicate = HasGFX10_BEncoding in
-defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
-  "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
->;
+let SubtargetPredicate = HasGFX10_BEncoding in {
+  defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
+"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
+  >;
+
+  let OtherPredicates = [HasAtomicCsubNoRtnInsts] in
+defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_NO_RTN <
+  "buffer_atomic_csub", VGPR_32, i32
+>;
+}
 
 let SubtargetPredicate = isGFX8GFX9 in {
 def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
@@ -1561,6 +1567,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, 
"BUFFER_ATOMIC_XOR_X2">;
 defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
 defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
 
+let SubtargetPredicate = HasAtomicCsubNoRtnInsts in
+defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", 
["noret"]>;
+
 let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
@@ -2491,7 +2500,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : 
MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
 defm BUFFER_ATOMIC_FMIN_X2 : MUBU

[clang-tools-extra] [AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (PR #68197)

2023-10-10 Thread Stephen Thomas via cfe-commits

https://github.com/stepthomas closed 
https://github.com/llvm/llvm-project/pull/68197
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits