[PATCH] D134568: [AMDGPU] Add pattern for flat fadd f64 intrinsic with local address

2022-09-25 Thread Petar Avramovic via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGdcc756d03e59: [AMDGPU] Pattern for flat atomic fadd f64 
intrinsic with local addr (authored by Petar.Avramovic).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Changed prior to commit:
  https://reviews.llvm.org/D134568?vs=462593&id=462719#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D134568/new/

https://reviews.llvm.org/D134568

Files:
  clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
  llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
  llvm/lib/Target/AMDGPU/DSInstructions.td
  llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll

Index: llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
===
--- llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
+++ llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
@@ -12,6 +12,7 @@
 declare double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
 declare double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data)
+declare double @llvm.amdgcn.flat.atomic.fadd.f64.p3f64.f64(double addrspace(3)* %ptr, double %data)
 declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data)
 declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data)
 declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, double, i32, i32, i1)
@@ -952,6 +953,35 @@
   ret double %ret
 }
 
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_from_flat_intrinsic(double addrspace(3)* %ptr, double %data) {
+; GFX90A-LABEL: local_atomic_fadd_f64_noret_from_flat_intrinsic:
+; GFX90A:   ; %bb.0: ; %main_body
+; GFX90A-NEXT:s_load_dword s4, s[0:1], 0x24
+; GFX90A-NEXT:s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX90A-NEXT:s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:v_mov_b32_e32 v2, s4
+; GFX90A-NEXT:v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90A-NEXT:ds_add_f64 v2, v[0:1]
+; GFX90A-NEXT:s_endpgm
+main_body:
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3f64.f64(double addrspace(3)* %ptr, double %data)
+  ret void
+}
+
+define double @local_atomic_fadd_f64_rtn_from_flat_intrinsic(double addrspace(3)* %ptr, double %data) {
+; GFX90A-LABEL: local_atomic_fadd_f64_rtn_from_flat_intrinsic:
+; GFX90A:   ; %bb.0: ; %main_body
+; GFX90A-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:v_mov_b32_e32 v3, v2
+; GFX90A-NEXT:v_mov_b32_e32 v2, v1
+; GFX90A-NEXT:ds_add_rtn_f64 v[0:1], v0, v[2:3]
+; GFX90A-NEXT:s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:s_setpc_b64 s[30:31]
+main_body:
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3f64.f64(double addrspace(3)* %ptr, double %data)
+  ret double %ret
+}
+
 define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) #1 {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat:
 ; GFX90A:   ; %bb.0: ; %main_body
@@ -994,7 +1024,7 @@
 ; GFX90A-NEXT:s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:v_mov_b32_e32 v0, s2
 ; GFX90A-NEXT:ds_read_b64 v[0:1], v0
-; GFX90A-NEXT:  .LBB52_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB54_1: ; %atomicrmw.start
 ; GFX90A-NEXT:; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:v_add_f64 v[2:3], v[0:1], 4.0
@@ -1006,7 +1036,7 @@
 ; GFX90A-NEXT:s_or_b64 s[0:1], vcc, s[0:1]
 ; GFX90A-NEXT:v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[0,1]
 ; GFX90A-NEXT:s_andn2_b64 exec, exec, s[0:1]
-; GFX90A-NEXT:s_cbranch_execnz .LBB52_1
+; GFX90A-NEXT:s_cbranch_execnz .LBB54_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:s_endpgm
 main_body:
Index: llvm/lib/Target/AMDGPU/DSInstructions.td
===
--- llvm/lib/Target/AMDGPU/DSInstructions.td
+++ llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1121,6 +1121,16 @@
 def : DSAtomicRetPat;
 let AddedComplexity = 1 in
 def : DSAtomicRetPat;
+
+class DSAtomicRetPatIntrinsic : GCNPat <
+  (vt (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value)),
+  (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 gds))> {
+}
+
+def : DSAtomicRetPatIntrinsic;
+let AddedComplexity = 1 in
+def : DSAtomicRetPatIntrinsic;
 }
 
 let SubtargetPredicate = isGFX940Plus in {
Index: llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
===
--- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -580,6 +580,22 @@
 }
 }
 
+multiclass local_addr_space_atomic_op {
+  def "_noret_local_addrspace" :
+PatFrag<(ops node:$ptr, node:$data),
+(!cast(NAME) node:$ptr, node:$data)>{
+  

[PATCH] D70808: [mips] Check that features required by built-ins are enabled

2019-11-28 Thread Petar Avramovic via Phabricator via cfe-commits
Petar.Avramovic accepted this revision.
Petar.Avramovic added a comment.
This revision is now accepted and ready to land.

LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70808/new/

https://reviews.llvm.org/D70808



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits