yaxunl updated this revision to Diff 139359. yaxunl added a comment. Upload diff with full context.
https://reviews.llvm.org/D44747 Files: lib/CodeGen/CodeGenModule.cpp test/CodeGenCUDA/kernel-amdgcn.cu Index: test/CodeGenCUDA/kernel-amdgcn.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/kernel-amdgcn.cu @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv() +class A { +public: + static __global__ void kernel(){} +}; + +// CHECK: define void @_Z10non_kernelv() +__device__ void non_kernel(){} + +// CHECK: define amdgpu_kernel void @_Z6kerneli(i32 %x) +__global__ void kernel(int x) { + non_kernel(); +} + +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_() +template<class T> +__global__ void template_kernel(T x) {} + +void launch(void *f); + +int main() { + launch((void*)A::kernel); + launch((void*)kernel); + launch((void*)template_kernel<A>); + return 0; +} Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -3590,6 +3590,9 @@ MaybeHandleStaticInExternC(D, Fn); + if ((getTriple().getArch() == llvm::Triple::amdgcn) && + D->hasAttr<CUDAGlobalAttr>()) + Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(D, Fn, FI);
Index: test/CodeGenCUDA/kernel-amdgcn.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/kernel-amdgcn.cu @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv() +class A { +public: + static __global__ void kernel(){} +}; + +// CHECK: define void @_Z10non_kernelv() +__device__ void non_kernel(){} + +// CHECK: define amdgpu_kernel void @_Z6kerneli(i32 %x) +__global__ void kernel(int x) { + non_kernel(); +} + +// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_() +template<class T> +__global__ void template_kernel(T x) {} + +void launch(void *f); + +int main() { + launch((void*)A::kernel); + launch((void*)kernel); + launch((void*)template_kernel<A>); + return 0; +} Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -3590,6 +3590,9 @@ MaybeHandleStaticInExternC(D, Fn); + if ((getTriple().getArch() == llvm::Triple::amdgcn) && + D->hasAttr<CUDAGlobalAttr>()) + Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(D, Fn, FI);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits