Author: Matt Arsenault
Date: 2023-01-07T13:39:05-05:00
New Revision: 6fe70cb465654eafafd272231e23762adeab4290

URL: 
https://github.com/llvm/llvm-project/commit/6fe70cb465654eafafd272231e23762adeab4290
DIFF: 
https://github.com/llvm/llvm-project/commit/6fe70cb465654eafafd272231e23762adeab4290.diff

LOG: clang/AMDGPU: Force disable block enqueue arguments for HIP

This is a dirty, dirty hack to workaround bot failures at
-O0. Currently these fields are only used by OpenCL features and
evidently the HIP runtime isn't expecting to see them in HIP
programs. The code objects should be language agnostic, so just force
optimize these out until the runtime is fixed.

Added: 
    clang/test/CodeGenHIP/default-attributes.hip

Modified: 
    clang/lib/CodeGen/TargetInfo.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/TargetInfo.cpp 
b/clang/lib/CodeGen/TargetInfo.cpp
index aec170ae5570..ee8852903eda 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -9520,6 +9520,15 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
     if (NumVGPR != 0)
       F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
   }
+
+  if (IsHIPKernel) {
+    // FIXME: This is a dirty, dirty hack to fix bot failures at -O0 and should
+    // be removed. The HIP runtime currently fails to handle the case where one
+    // of these fields fails to optimize out. The runtime should tolerate all
+    // requested implicit inputs regardless of language.
+    F->addFnAttr("amdgpu-no-default-queue");
+    F->addFnAttr("amdgpu-no-completion-action");
+  }
 }
 
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(

diff  --git a/clang/test/CodeGenHIP/default-attributes.hip 
b/clang/test/CodeGenHIP/default-attributes.hip
new file mode 100644
index 000000000000..b4f4a6201956
--- /dev/null
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \
+// RUN:    -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s
+
+// RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \
+// RUN:    -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+
+// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
+// OPTNONE-LABEL: define {{[^@]+}}@_Z4funcv
+// OPTNONE-SAME: () #[[ATTR0:[0-9]+]] {
+// OPTNONE-NEXT:  entry:
+// OPTNONE-NEXT:    ret void
+//
+// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind 
willreturn memory(none)
+// OPT-LABEL: define {{[^@]+}}@_Z4funcv
+// OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// OPT-NEXT:  entry:
+// OPT-NEXT:    ret void
+//
+__device__ void func() {
+
+}
+
+// OPTNONE: Function Attrs: convergent mustprogress noinline norecurse 
nounwind optnone
+// OPTNONE-LABEL: define {{[^@]+}}@_Z6kernelv
+// OPTNONE-SAME: () #[[ATTR1:[0-9]+]] {
+// OPTNONE-NEXT:  entry:
+// OPTNONE-NEXT:    ret void
+//
+// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind 
willreturn memory(none)
+// OPT-LABEL: define {{[^@]+}}@_Z6kernelv
+// OPT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// OPT-NEXT:  entry:
+// OPT-NEXT:    ret void
+//
+__global__ void kernel() {
+
+}
+//.
+// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind 
optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse 
nounwind optnone "amdgpu-flat-work-group-size"="1,1024" 
"amdgpu-no-completion-action" "amdgpu-no-default-queue" 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"uniform-work-group-size"="true" }
+//.
+// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind 
willreturn memory(none) "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" }
+// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind 
willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" 
"amdgpu-no-completion-action" "amdgpu-no-default-queue" 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"uniform-work-group-size"="true" }


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to