yaxunl updated this revision to Diff 218261. yaxunl added a comment. Herald added a subscriber: jvesely.
add test CHANGES SINCE LAST ACTION https://reviews.llvm.org/D67048/new/ https://reviews.llvm.org/D67048 Files: lib/CodeGen/TargetInfo.cpp test/CodeGenCUDA/kernel-amdgcn.cu Index: test/CodeGenCUDA/kernel-amdgcn.cu =================================================================== --- test/CodeGenCUDA/kernel-amdgcn.cu +++ test/CodeGenCUDA/kernel-amdgcn.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s #include "Inputs/cuda.h" // CHECK: define amdgpu_kernel void @_ZN1A6kernelEv @@ -25,7 +25,7 @@ EmptyKernelPtr Empty() { return EmptyKernel<void>; } }; -// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_ +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]] template<class T> __global__ void template_kernel(T x) {} @@ -39,3 +39,4 @@ launch((void*)D.Empty()); return 0; } +// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256" Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -7922,8 +7922,9 @@ const bool IsOpenCLKernel = M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); - if ((IsOpenCLKernel || - (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) && + const bool IsHIPKernel = M.getLangOpts().HIP && + FD->hasAttr<CUDAGlobalAttr>(); + if ((IsOpenCLKernel || IsHIPKernel) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); @@ -7949,7 +7950,7 @@ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); - } else if (IsOpenCLKernel) { + } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to 256. F->addFnAttr("amdgpu-flat-work-group-size", "1,256"); }
Index: test/CodeGenCUDA/kernel-amdgcn.cu =================================================================== --- test/CodeGenCUDA/kernel-amdgcn.cu +++ test/CodeGenCUDA/kernel-amdgcn.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s #include "Inputs/cuda.h" // CHECK: define amdgpu_kernel void @_ZN1A6kernelEv @@ -25,7 +25,7 @@ EmptyKernelPtr Empty() { return EmptyKernel<void>; } }; -// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_ +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]] template<class T> __global__ void template_kernel(T x) {} @@ -39,3 +39,4 @@ launch((void*)D.Empty()); return 0; } +// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256" Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -7922,8 +7922,9 @@ const bool IsOpenCLKernel = M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); - if ((IsOpenCLKernel || - (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) && + const bool IsHIPKernel = M.getLangOpts().HIP && + FD->hasAttr<CUDAGlobalAttr>(); + if ((IsOpenCLKernel || IsHIPKernel) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); @@ -7949,7 +7950,7 @@ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); - } else if (IsOpenCLKernel) { + } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to 256. F->addFnAttr("amdgpu-flat-work-group-size", "1,256"); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits