[clang] a3becb3 - [clang][AMDGPU] Temporarily disable clang atomic fadd test for gfx90a

2022-09-23 Thread Petar Avramovic via cfe-commits

Author: Petar Avramovic
Date: 2022-09-23T21:49:16+02:00
New Revision: a3becb333d7faae695e18728e9b8fa3a3579a240

URL: 
https://github.com/llvm/llvm-project/commit/a3becb333d7faae695e18728e9b8fa3a3579a240
DIFF: 
https://github.com/llvm/llvm-project/commit/a3becb333d7faae695e18728e9b8fa3a3579a240.diff

LOG: [clang][AMDGPU] Temporarily disable clang atomic fadd test for gfx90a

Test is broken by D130579. Temporarily disable to silence builbot failures.

Added: 


Modified: 
clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl

Removed: 




diff  --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
index f078f4e93bb3d..467745948215d 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
@@ -1,9 +1,6 @@
 // RUN: %clang_cc1 -no-opaque-pointers -O0 -cl-std=CL2.0 -triple 
amdgcn-amd-amdhsa -target-cpu gfx90a \
 // RUN:   %s -S -emit-llvm -o - | FileCheck %s -check-prefix=CHECK
 
-// RUN: %clang_cc1 -no-opaque-pointers -O0 -cl-std=CL2.0 -triple 
amdgcn-amd-amdhsa -target-cpu gfx90a \
-// RUN:   -S -o - %s | FileCheck -check-prefix=GFX90A %s
-
 // REQUIRES: amdgpu-registered-target
 
 typedef half __attribute__((ext_vector_type(2))) half2;



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] dcc756d - [AMDGPU] Pattern for flat atomic fadd f64 intrinsic with local addr

2022-09-25 Thread Petar Avramovic via cfe-commits

Author: Petar Avramovic
Date: 2022-09-25T13:25:41+02:00
New Revision: dcc756d03e597d6a405493a630f66ce4fcb7d656

URL: 
https://github.com/llvm/llvm-project/commit/dcc756d03e597d6a405493a630f66ce4fcb7d656
DIFF: 
https://github.com/llvm/llvm-project/commit/dcc756d03e597d6a405493a630f66ce4fcb7d656.diff

LOG: [AMDGPU] Pattern for flat atomic fadd f64 intrinsic with local addr

Fix regression from clang opencl test in builtins-fp-atomics-gfx90a.cl
test_flat_add_local_f64 caused by D130579
Revert a3becb333d7faae695e18728e9b8fa3a3579a240.

Differential Revision: https://reviews.llvm.org/D134568

Added: 


Modified: 
clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/lib/Target/AMDGPU/DSInstructions.td
llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll

Removed: 




diff  --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
index 467745948215d..f078f4e93bb3d 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -no-opaque-pointers -O0 -cl-std=CL2.0 -triple 
amdgcn-amd-amdhsa -target-cpu gfx90a \
 // RUN:   %s -S -emit-llvm -o - | FileCheck %s -check-prefix=CHECK
 
+// RUN: %clang_cc1 -no-opaque-pointers -O0 -cl-std=CL2.0 -triple 
amdgcn-amd-amdhsa -target-cpu gfx90a \
+// RUN:   -S -o - %s | FileCheck -check-prefix=GFX90A %s
+
 // REQUIRES: amdgpu-registered-target
 
 typedef half __attribute__((ext_vector_type(2))) half2;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 047583077687d..52551c80526b8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -580,6 +580,22 @@ multiclass flat_addr_space_atomic_op {
 }
 }
 
+multiclass local_addr_space_atomic_op {
+  def "_noret_local_addrspace" :
+PatFrag<(ops node:$ptr, node:$data),
+(!cast(NAME) node:$ptr, node:$data)>{
+  let HasNoUse = true;
+  let AddressSpaces = LoadAddress_local.AddrSpaces;
+  let IsAtomic = 1;
+}
+def "_local_addrspace" :
+PatFrag<(ops node:$ptr, node:$data),
+(!cast(NAME) node:$ptr, node:$data)>{
+  let AddressSpaces = LoadAddress_local.AddrSpaces;
+  let IsAtomic = 1;
+}
+}
+
 defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op;
 defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op;
 defm int_amdgcn_flat_atomic_fmin : noret_op;
@@ -589,6 +605,7 @@ defm int_amdgcn_flat_atomic_fadd : 
global_addr_space_atomic_op;
 defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
 defm int_amdgcn_global_atomic_fmin : noret_op;
 defm int_amdgcn_global_atomic_fmax : noret_op;
+defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
 defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
 
 multiclass noret_binary_atomic_op {

diff  --git a/llvm/lib/Target/AMDGPU/DSInstructions.td 
b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 292e85f9b11a5..9ff9c8ab110b7 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1121,6 +1121,16 @@ let SubtargetPredicate = isGFX90APlus in {
 def : DSAtomicRetPat;
 let AddedComplexity = 1 in
 def : DSAtomicRetPat;
+
+class DSAtomicRetPatIntrinsic : GCNPat <
+  (vt (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value)),
+  (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 gds))> {
+}
+
+def : DSAtomicRetPatIntrinsic;
+let AddedComplexity = 1 in
+def : DSAtomicRetPatIntrinsic;
 }
 
 let SubtargetPredicate = isGFX940Plus in {

diff  --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll 
b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
index 5dd73ebedf969..f5104eba35ec2 100644
--- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
@@ -12,6 +12,7 @@ declare double 
@llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)
 declare double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double 
addrspace(1)* %ptr, double %data)
 declare double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double 
addrspace(1)* %ptr, double %data)
 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, 
double %data)
+declare double @llvm.amdgcn.flat.atomic.fadd.f64.p3f64.f64(double 
addrspace(3)* %ptr, double %data)
 declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, 
double %data)
 declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, 
double %data)
 declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, 
double, i32, i32, i1)
@@ -952,6 +953,35 @@ main_body:
   ret double %ret
 }
 
+define amdgpu_kernel void 
@local_atomic_fadd_f64_noret_from_flat_intrinsic(double addrspace(3)* %ptr, 
double %data) {
+; GFX90A-LABEL: local_atomic_fadd_f64