[PATCH] D82087: AMDGPU/clang: Add builtins for llvm.amdgcn.ballot

Matt Arsenault via Phabricator via cfe-commits Thu, 18 Jun 2020 05:58:05 -0700

arsenm created this revision.
arsenm added reviewers: yaxunl, rampitec, b-sumner, foad, nhaehnle.
Herald added subscribers: kerbowa, t-tye, tpr, dstuttard, wdng, jvesely, 
kzhuravl.


I wasn't sure what the best strategy was for the wave size
difference. I went for an explicit, enforced builtin for each. The
other option would be to just assume wave64, and IRGen the different
mangling + zext. I didn't see an obvious way to check the wave size
where builtins are emitted, and it might be beneficial to force you to
acknowledge the wave size difference? Or it might be an unnecessary
complication.

The behavior is also slightly odd when directly specifying
-target-feature to cc1 for +/- the size (since we have both positive
and negative forms of both sizes), but this is probably unimportant.

We're also still missing a predefined macro for the wave size, which
we probably need.


https://reviews.llvm.org/D82087

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
  clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
  clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl

Index: clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl
===================================================================
--- /dev/null
+++ clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize32 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s
+
+typedef unsigned long ulong;
+
+void test_ballot_wave64(global ulong* out, int a, int b) {
+  *out = __builtin_amdgcn_ballot_wave64(a == b);  // expected-error {{'__builtin_amdgcn_ballot_wave64' needs target feature wavefrontsize64}}
+}
Index: clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
===================================================================
--- /dev/null
+++ clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify -S -o - %s
+
+typedef unsigned int uint;
+
+void test_ballot_wave32(global uint* out, int a, int b) {
+  *out = __builtin_amdgcn_ballot_wave32(a == b);  // expected-error {{'__builtin_amdgcn_ballot_wave32' needs target feature wavefrontsize32}}
+}
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// XUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature -wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+
+typedef unsigned long ulong;
+
+// CHECK-LABEL: @test_ballot_wave64(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
+void test_ballot_wave64(global ulong* out, int a, int b)
+{
+  *out = __builtin_amdgcn_ballot_wave64(a == b);
+}
+
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+
+typedef unsigned int uint;
+
+// CHECK-LABEL: @test_ballot_wave32(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
+void test_ballot_wave32(global uint* out, int a, int b)
+{
+  *out = __builtin_amdgcn_ballot_wave32(a == b);
+}
+
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
@@ -22,3 +22,10 @@
 void test_mov_dpp8(global uint* out, uint a) {
   *out = __builtin_amdgcn_mov_dpp8(a, 1);
 }
+
+// CHECK-LABEL: @test_ballot_wave32(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
+void test_ballot_wave32(global uint* out, int a, int b)
+{
+  *out = __builtin_amdgcn_ballot_wave32(a == b);
+}
Index: clang/test/CodeGenOpenCL/amdgpu-features.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -3,6 +3,10 @@
 // Check that appropriate features are defined for every supported AMDGPU
 // "-target" and "-mcpu" options.
 
+// RUN: %clang_cc1 -triple amdgcn -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s
+// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE32 %s
+// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE64 %s
+
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx700 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s
@@ -15,16 +19,20 @@
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1030 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1030 %s
 
-// GFX600-NOT: "target-features"
-// GFX601-NOT: "target-features"
-// GFX700: "target-features"="+ci-insts,+flat-address-space"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime"
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime"
-// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime"
-// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
-// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
-// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
-// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// NOCPU-NOT: "target-features"
+// NOCPU-WAVE32: "target-features"="+wavefrontsize32"
+// NOCPU-WAVE64: "target-features"="+wavefrontsize64"
+
+// GFX600: "target-features"="+wavefrontsize64"
+// GFX601: "target-features"="+wavefrontsize64"
+// GFX700: "target-features"="+ci-insts,+flat-address-space,+wavefrontsize64"
+// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+wavefrontsize64"
+// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+wavefrontsize64"
+// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+wavefrontsize64"
+// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+wavefrontsize64"
+// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+wavefrontsize32"
+// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+wavefrontsize32"
+// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+wavefrontsize32"
+// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+wavefrontsize32"
 
 kernel void test() {}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14517,6 +14517,13 @@
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
   case AMDGPU::BI__builtin_amdgcn_sbfe:
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
+  case AMDGPU::BI__builtin_amdgcn_ballot_wave32:
+  case AMDGPU::BI__builtin_amdgcn_ballot_wave64: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
+    return Builder.CreateCall(F, { Src });
+  }
   case AMDGPU::BI__builtin_amdgcn_uicmp:
   case AMDGPU::BI__builtin_amdgcn_uicmpl:
   case AMDGPU::BI__builtin_amdgcn_sicmp:
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -164,6 +164,8 @@
 bool AMDGPUTargetInfo::initFeatureMap(
     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
     const std::vector<std::string> &FeatureVec) const {
+  const bool IsNullCPU = CPU.empty();
+  bool IsWave32Capable = false;
 
   using namespace llvm::AMDGPU;
 
@@ -171,6 +173,7 @@
   if (isAMDGCN(getTriple())) {
     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
     case GK_GFX1030:
+      IsWave32Capable = true;
       Features["ci-insts"] = true;
       Features["dot1-insts"] = true;
       Features["dot2-insts"] = true;
@@ -194,6 +197,7 @@
       Features["dot6-insts"] = true;
       LLVM_FALLTHROUGH;
     case GK_GFX1010:
+      IsWave32Capable = true;
       Features["dl-insts"] = true;
       Features["ci-insts"] = true;
       Features["flat-address-space"] = true;
@@ -276,7 +280,21 @@
     }
   }
 
-  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
+  if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
+    return false;
+
+  // Don't assume any wavesize with an unknown subtarget.
+  if (!IsNullCPU) {
+    // Default to wave32 if available, or wave64 if not
+    if (Features.count("wavefrontsize32") == 0 &&
+        Features.count("wavefrontsize64") == 0) {
+      StringRef DefaultWaveSizeFeature = IsWave32Capable ?
+        "wavefrontsize32" : "wavefrontsize64";
+      Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
+    }
+  }
+
+  return true;
 }
 
 void AMDGPUTargetInfo::fillValidCPUList(
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -103,12 +103,6 @@
 BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
-BUILTIN(__builtin_amdgcn_uicmp, "LUiUiUiIi", "nc")
-BUILTIN(__builtin_amdgcn_uicmpl, "LUiLUiLUiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmp, "LUiiiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmpl, "LUiLiLiIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmp, "LUiddIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmpf, "LUiffIi", "nc")
 BUILTIN(__builtin_amdgcn_ds_swizzle, "iiIi", "nc")
 BUILTIN(__builtin_amdgcn_ds_permute, "iii", "nc")
 BUILTIN(__builtin_amdgcn_ds_bpermute, "iii", "nc")
@@ -138,6 +132,21 @@
 BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "LUiLUiUiLUi", "nc")
 BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiLUiUiV4Ui", "nc")
 
+//===----------------------------------------------------------------------===//
+// Ballot builtins.
+//===----------------------------------------------------------------------===//
+
+TARGET_BUILTIN(__builtin_amdgcn_ballot_wave32, "Uib", "nc", "wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_ballot_wave64, "LUib", "nc", "wavefrontsize64")
+
+// Deprecated intrinsics in favor of __builtin_amdgn_ballot_{wave32|wave64}
+BUILTIN(__builtin_amdgcn_uicmp, "LUiUiUiIi", "nc")
+BUILTIN(__builtin_amdgcn_uicmpl, "LUiLUiLUiIi", "nc")
+BUILTIN(__builtin_amdgcn_sicmp, "LUiiiIi", "nc")
+BUILTIN(__builtin_amdgcn_sicmpl, "LUiLiLiIi", "nc")
+BUILTIN(__builtin_amdgcn_fcmp, "LUiddIi", "nc")
+BUILTIN(__builtin_amdgcn_fcmpf, "LUiffIi", "nc")
+
 //===----------------------------------------------------------------------===//
 // CI+ only builtins.
 //===----------------------------------------------------------------------===//

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D82087: AMDGPU/clang: Add builtins for llvm.amdgcn.ballot

Reply via email to