[PATCH] D148796: [AMDGPU][GFX908] Add builtin support for global add atomic f16/f32

Mariusz Sikora via Phabricator via cfe-commits Thu, 20 Apr 2023 05:39:15 -0700

mariusz-sikora-at-amd created this revision.
Herald added subscribers: kosarev, kerbowa, hiraditya, tpr, dstuttard, yaxunl, 
jvesely, kzhuravl.
Herald added a project: All.
mariusz-sikora-at-amd requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, wdng.
Herald added projects: clang, LLVM.


Global add atomic instructions f16/f32 are supported on many targets (like 
gfx908,
gfx90a, ... ), but only on gfx908 these instructions are not returning
old value from memory. This difference resulted to omitting them while
adding builtin support.

This change is extending support of existing builtins to support gfx908.
By default builtins are returning v2f16 or float types, but if target is
gfx908 then clang-sema will override return type to void. This will lead
later to errors if user is expecting to receive any return values from
builtins.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D148796

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f16-gfx9-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f32-gfx9-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-noret-err.cl
  clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx908.cl
  llvm/lib/TargetParser/TargetParser.cpp

Index: llvm/lib/TargetParser/TargetParser.cpp
===================================================================
--- llvm/lib/TargetParser/TargetParser.cpp
+++ llvm/lib/TargetParser/TargetParser.cpp
@@ -340,6 +340,7 @@
       Features["dot5-insts"] = true;
       Features["dot6-insts"] = true;
       Features["mai-insts"] = true;
+      Features["atomic-fadd-no-rtn-insts"] = true;
       [[fallthrough]];
     case GK_GFX906:
       Features["dl-insts"] = true;
Index: clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx908.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx908.cl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
+// RUN:   %s -S -emit-llvm -o - | FileCheck %s -check-prefix=CHECK
+
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
+// RUN:   -S -o - %s | FileCheck -check-prefix=GFX908 %s
+
+// REQUIRES: amdgpu-registered-target
+
+typedef half __attribute__((ext_vector_type(2))) half2;
+
+// CHECK-LABEL: test_global_add_half2
+// CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %0, <2 x half> %1)
+// GFX908-LABEL:  test_global_add_half2
+// GFX908:  global_atomic_pk_add_f16 v[0:1], v2, off
+half2 test_global_add_half2(__global half2 *addr, half2 x) {
+  __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x);
+  return *addr;
+}
+
+// CHECK-LABEL: test_global_add_float
+// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %0, float %1)
+// GFX908-LABEL:  test_global_add_float
+// GFX908:  global_atomic_add_f32 v[0:1], v2, off
+float test_global_add_float(__global float *addr, float x) {
+  __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
+  return *addr;
+}
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-noret-err.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-noret-err.cl
@@ -0,0 +1,40 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
+// RUN:   -verify -S -o - %s
+
+typedef half __attribute__((ext_vector_type(2))) half2;
+
+half2 func1(half2 x); // expected-note{{passing argument to parameter 'x' here}}
+float func2(float x); // expected-note{{passing argument to parameter 'x' here}}
+
+half2 test_global_fadd_v2f16(__global half2 *addrh2, half2 xh2) {
+  half2 *rtn1;
+
+  half2 *rtn2 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{initializing '__generic half2 *__private' with an expression of incompatible type 'void'}}
+  *rtn1 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{assigning to '__generic half2' (vector of 2 'half' values) from incompatible type 'void'}}
+  *rtn1 = *rtn1 + __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{cannot convert between vector and non-scalar values ('half2' (vector of 2 'half' values) and 'void')}}
+  func1(__builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2)); // expected-error{{passing 'void' to parameter of incompatible type 'half2' (vector of 2 'half' values)}}
+
+  half2 rtn3;
+  half2 rtn4 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{initializing '__private half2' (vector of 2 'half' values) with an expression of incompatible type 'void'}}
+  rtn3 = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{assigning to '__private half2' (vector of 2 'half' values) from incompatible type 'void'}}
+  rtn3 = rtn3 + __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{cannot convert between vector and non-scalar values ('half2' (vector of 2 'half' values) and 'void')}}
+
+  return __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{returning 'void' from a function with incompatible result type 'half2' (vector of 2 'half' values)}}
+}
+
+float test_global_fadd_f32(__global float *addrf, float xf) {
+  float *rtn1;
+
+  float *rtn2 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{initializing '__generic float *__private' with an expression of incompatible type 'void'}}
+  *rtn1 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{assigning to '__generic float' from incompatible type 'void'}}
+  *rtn1 = *rtn1 + __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{invalid operands to binary expression ('__generic float' and 'void')}}
+  func2(__builtin_amdgcn_global_atomic_fadd_f32(addrf, xf)); // expected-error{{passing 'void' to parameter of incompatible type 'float'}}
+
+  float rtn3;
+  float rtn4 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{initializing '__private float' with an expression of incompatible type 'void'}}
+  rtn3 = __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{assigning to '__private float' from incompatible type 'void'}}
+  rtn3 = rtn3 + __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{invalid operands to binary expression ('__private float' and 'void')}}
+
+  return __builtin_amdgcn_global_atomic_fadd_f32(addrf, xf); // expected-error{{returning 'void' from a function with incompatible result type 'float'}}
+}
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
@@ -2,21 +2,12 @@
 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
 // RUN:   -verify -S -o - %s
 
-typedef half __attribute__((ext_vector_type(2))) half2;
-
-void test_global_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2,
-                      __global float *addrf, float xf,
-                      __global double *addr, double x) {
-  half2 *half_rtn;
-  float *fp_rtn;
+void test_global_fadd(__global double *addr, double x) {
   double *rtn;
-  *half_rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}}
-  *fp_rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts}}
   *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmax_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmin_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmin_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmax_f64' needs target feature gfx90a-insts}}
-  __builtin_amdgcn_ds_atomic_fadd_v2f16(addrh2l, xh2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2f16' needs target feature atomic-ds-pk-add-16-insts}}
 }
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f32-gfx9-err.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f32-gfx9-err.cl
@@ -0,0 +1,10 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -verify -S -emit-llvm -o - %s
+
+float test_global_fadd_f32(__global float *addr, float x) {
+  float *rtn;
+
+  __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts}}
+  *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts}}
+  return __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts}}
+}
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f16-gfx9-err.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-f16-gfx9-err.cl
@@ -0,0 +1,12 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -verify -S -emit-llvm -o - %s
+
+typedef half __attribute__((ext_vector_type(2))) half2;
+
+half2 test_global_fadd_f16(__global half2 *addrh2, half2 xh2) {
+  half2 *rtn;
+
+  __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts}}
+  *rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts}}
+  return __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts}}
+}
Index: clang/test/CodeGenOpenCL/amdgpu-features.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -70,11 +70,11 @@
 // GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX908: "target-features"="+16-bit-insts,+atomic-fadd-no-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-no-rtn-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-no-rtn-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -4477,6 +4477,21 @@
 
 bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
                                           CallExpr *TheCall) {
+
+  if (BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16 ||
+      BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32) {
+    auto TargetID = Context.getTargetInfo().getTargetID();
+    if (!TargetID || TargetID->find("gfx908") == std::string::npos)
+      return false;
+
+    // GFX908/MI100 global atomic f32/f16 are not returning old value from
+    // memory. By overriding return type of the builtin to 'void' we will force
+    // clang to throw error if anyone is expecting to receive return value from
+    // the builtin.
+    TheCall->setType(Context.VoidTy);
+    return false;
+  }
+
   // position of memory order and scope arguments in the builtin
   unsigned OrderIndex, ScopeIndex;
   switch (BuiltinID) {
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -214,8 +214,8 @@
 TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
 
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts|atomic-fadd-no-rtn-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts|atomic-fadd-no-rtn-insts")
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts")
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D148796: [AMDGPU][GFX908] Add builtin support for global add atomic f16/f32

Reply via email to