Author: Matt Arsenault Date: 2023-01-07T13:39:05-05:00 New Revision: 6fe70cb465654eafafd272231e23762adeab4290
URL: https://github.com/llvm/llvm-project/commit/6fe70cb465654eafafd272231e23762adeab4290 DIFF: https://github.com/llvm/llvm-project/commit/6fe70cb465654eafafd272231e23762adeab4290.diff LOG: clang/AMDGPU: Force disable block enqueue arguments for HIP This is a dirty, dirty hack to workaround bot failures at -O0. Currently these fields are only used by OpenCL features and evidently the HIP runtime isn't expecting to see them in HIP programs. The code objects should be language agnostic, so just force optimize these out until the runtime is fixed. Added: clang/test/CodeGenHIP/default-attributes.hip Modified: clang/lib/CodeGen/TargetInfo.cpp Removed: ################################################################################ diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index aec170ae5570..ee8852903eda 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9520,6 +9520,15 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + if (IsHIPKernel) { + // FIXME: This is a dirty, dirty hack to fix bot failures at -O0 and should + // be removed. The HIP runtime currently fails to handle the case where one + // of these fields fails to optimize out. The runtime should tolerate all + // requested implicit inputs regardless of language. + F->addFnAttr("amdgpu-no-default-queue"); + F->addFnAttr("amdgpu-no-completion-action"); + } } void AMDGPUTargetCodeGenInfo::setTargetAttributes( diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip new file mode 100644 index 000000000000..b4f4a6201956 --- /dev/null +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -0,0 +1,47 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s + +// RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s + +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) + +// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone +// OPTNONE-LABEL: define {{[^@]+}}@_Z4funcv +// OPTNONE-SAME: () #[[ATTR0:[0-9]+]] { +// OPTNONE-NEXT: entry: +// OPTNONE-NEXT: ret void +// +// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +// OPT-LABEL: define {{[^@]+}}@_Z4funcv +// OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { +// OPT-NEXT: entry: +// OPT-NEXT: ret void +// +__device__ void func() { + +} + +// OPTNONE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// OPTNONE-LABEL: define {{[^@]+}}@_Z6kernelv +// OPTNONE-SAME: () #[[ATTR1:[0-9]+]] { +// OPTNONE-NEXT: entry: +// OPTNONE-NEXT: ret void +// +// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +// OPT-LABEL: define {{[^@]+}}@_Z6kernelv +// OPT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] { +// OPT-NEXT: entry: +// OPT-NEXT: ret void +// +__global__ void kernel() { + +} +//. +// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +//. +// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits