hliao created this revision. hliao added a reviewer: yaxunl. Herald added a project: clang. Herald added a subscriber: cfe-commits.
- Under different ABIs, it's obvious that assertion is too strong. Even under the same ABI, once there are unnamed type not required to follow ODR rule, host- and device-side mangling may still get different names. As both the host- and device-side compilation always observe the same AST tree, even with different names, we still could associate the correct pairs, i.e., we don't use (mangled) names to linkage host- and device-side globals. There's no need to have this assertion. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D62971 Files: clang/lib/CodeGen/CGCUDANV.cpp clang/test/CodeGenCUDA/unnamed-types.cu Index: clang/test/CodeGenCUDA/unnamed-types.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/unnamed-types.cu @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: @0 = private unnamed_addr constant [40 x i8] c"_Z2k0IZZ2f1PfENK3$_0clES0_EUlfE_EvS0_T_\00" + +template <typename F> +__global__ void k0(float *p, F f) { + p[0] = f(p[0]); +} + +void f0(float *p) { + [](float *p) { + *p = 1.f; + }(p); +} + +void f1(float *p) { + [](float *p) { + k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; }); + }(p); +} +// CHECK: @__hip_register_globals +// CHECK: __hipRegisterFunction{{.*}}_Z2k0IZZ2f1PfENK3$_1clES0_EUlfE_EvS0_T_{{.*}}@0 Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -217,11 +217,6 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { - assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() || - getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() || - CGF.CGM.getContext().getTargetInfo().getCXXABI() != - CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI()); - EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), CudaFeature::CUDA_USES_NEW_LAUNCH))
Index: clang/test/CodeGenCUDA/unnamed-types.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/unnamed-types.cu @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: @0 = private unnamed_addr constant [40 x i8] c"_Z2k0IZZ2f1PfENK3$_0clES0_EUlfE_EvS0_T_\00" + +template <typename F> +__global__ void k0(float *p, F f) { + p[0] = f(p[0]); +} + +void f0(float *p) { + [](float *p) { + *p = 1.f; + }(p); +} + +void f1(float *p) { + [](float *p) { + k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; }); + }(p); +} +// CHECK: @__hip_register_globals +// CHECK: __hipRegisterFunction{{.*}}_Z2k0IZZ2f1PfENK3$_1clES0_EUlfE_EvS0_T_{{.*}}@0 Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -217,11 +217,6 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { - assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() || - getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() || - CGF.CGM.getContext().getTargetInfo().getCXXABI() != - CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI()); - EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), CudaFeature::CUDA_USES_NEW_LAUNCH))
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits