yaxunl created this revision. yaxunl added a reviewer: tra. yaxunl requested review of this revision.
Currently Visual Studio 2019 has a linker issue which causes linking error when a template kernel is instantiated in different compilation units. On the other hand, it is unnecessary to prefix kernel stub for MSVC target since the host and device compilation uses different mangling ABI. This patch let clang not emit kernel handle for MSVC target to work around the linker issue. https://reviews.llvm.org/D112492 Files: clang/include/clang/AST/GlobalDecl.h clang/lib/CodeGen/CGCUDANV.cpp clang/lib/CodeGen/CGExpr.cpp clang/test/CodeGenCUDA/kernel-stub-name.cu
Index: clang/test/CodeGenCUDA/kernel-stub-name.cu =================================================================== --- clang/test/CodeGenCUDA/kernel-stub-name.cu +++ clang/test/CodeGenCUDA/kernel-stub-name.cu @@ -2,16 +2,35 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - -x hip\ -// RUN: | FileCheck %s +// RUN: | FileCheck -check-prefixes=CHECK,GNU %s + +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -fcuda-include-gpubinary %t -o - -x hip\ +// RUN: | FileCheck -check-prefix=GNUNEG %s + +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \ +// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \ +// RUN: %t -o - -x hip\ +// RUN: | FileCheck -check-prefixes=CHECK,MSVC %s + +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -emit-llvm %s \ +// RUN: -aux-triple amdgcn-amd-amdhsa -fcuda-include-gpubinary \ +// RUN: %t -o - -x hip\ +// RUN: | FileCheck -check-prefix=MSVCNEG %s #include "Inputs/cuda.h" -// Kernel handles +// Check kernel handles are emitted for non-MSVC target but not for MSVC target. + +// GNU: @[[HCKERN:ckernel]] = constant void ()* @[[CSTUB:__device_stub__ckernel]], align 8 +// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8 +// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], align 8 +// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8 -// CHECK: @[[HCKERN:ckernel]] = constant void ()* @__device_stub__ckernel, align 8 -// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant void ()* @_ZN2ns23__device_stub__nskernelEv, align 8 -// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant void ()* @_Z25__device_stub__kernelfuncIiEvv, align 8 -// CHECK: @[[HDKERN:_Z11kernel_declv]] = external constant void ()*, align 8 +// MSVCNEG-NOT: @ckernel = +// MSVCNEG-NOT: @{{"\?nskernel@ns@@YAXXZ.*"}} = +// MSVCNEG-NOT: @{{"\?\?\$kernelfunc@H@@YAXXZ.*"}} = +// MSVCNEG-NOT: @{{"\?kernel_decl@@YAXXZ.*"}} = extern "C" __global__ void ckernel() {} @@ -24,10 +43,10 @@ __global__ void kernel_decl(); -void (*kernel_ptr)(); -void *void_ptr; +extern "C" void (*kernel_ptr)(); +extern "C" void *void_ptr; -void launch(void *kern); +extern "C" void launch(void *kern); // Device side kernel names @@ -37,21 +56,27 @@ // Non-template kernel stub functions -// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]] -// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]] +// GNU: define{{.*}}@[[CSTUB]] +// GNU: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]] +// MSVC: define{{.*}}@[[CSTUB:ckernel]] +// MSVC: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]] +// GNU: define{{.*}}@[[NSSTUB]] +// GNU: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]] +// MSVC: define{{.*}}@[[NSSTUB:"\?nskernel@ns@@YAXXZ"]] +// MSVC: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]] -// Check kernel stub is used for triple chevron +// Check kernel stub is called for triple chevron. -// CHECK-LABEL: define{{.*}}@_Z4fun1v() +// CHECK-LABEL: define{{.*}}@fun1() // CHECK: call void @[[CSTUB]]() // CHECK: call void @[[NSSTUB]]() -// CHECK: call void @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]]() -// CHECK: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]() +// GNU: call void @[[TSTUB]]() +// GNU: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]() +// MSVC: call void @[[TSTUB:"\?\?\$kernelfunc@H@@YAXXZ"]]() +// MSVC: call void @[[DSTUB:"\?kernel_decl@@YAXXZ"]]() -void fun1(void) { +extern "C" void fun1(void) { ckernel<<<1, 1>>>(); ns::nskernel<<<1, 1>>>(); kernelfunc<int><<<1, 1>>>(); @@ -61,34 +86,45 @@ // Template kernel stub functions // CHECK: define{{.*}}@[[TSTUB]] -// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]] +// GNU: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]] +// MSVC: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]] // Check declaration of stub function for external kernel. // CHECK: declare{{.*}}@[[DSTUB]] // Check kernel handle is used for passing the kernel as a function pointer - -// CHECK-LABEL: define{{.*}}@_Z4fun2v() -// CHECK: call void @_Z6launchPv({{.*}}[[HCKERN]] -// CHECK: call void @_Z6launchPv({{.*}}[[HNSKERN]] -// CHECK: call void @_Z6launchPv({{.*}}[[HTKERN]] -// CHECK: call void @_Z6launchPv({{.*}}[[HDKERN]] -void fun2() { +// for non-MSVC target but kernel stub is used for MSVC target. + +// CHECK-LABEL: define{{.*}}@fun2() +// GNU: call void @launch({{.*}}[[HCKERN]] +// GNU: call void @launch({{.*}}[[HNSKERN]] +// GNU: call void @launch({{.*}}[[HTKERN]] +// GNU: call void @launch({{.*}}[[HDKERN]] +// MSVC: call void @launch({{.*}}[[CSTUB]] +// MSVC: call void @launch({{.*}}[[NSSTUB]] +// MSVC: call void @launch({{.*}}[[TSTUB]] +// MSVC: call void @launch({{.*}}[[DSTUB]] +extern "C" void fun2() { launch((void *)ckernel); launch((void *)ns::nskernel); launch((void *)kernelfunc<int>); launch((void *)kernel_decl); } -// Check kernel handle is used for assigning a kernel to a function pointer - -// CHECK-LABEL: define{{.*}}@_Z4fun3v() -// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8 -// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8 -// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8 -// CHECK: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8 -void fun3() { +// Check kernel handle is used for assigning a kernel to a function pointer for +// non-MSVC target but kernel stub is used for MSVC target. + +// CHECK-LABEL: define{{.*}}@fun3() +// GNU: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8 +// GNU: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr, align 8 +// GNU: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8 +// GNU: store i8* bitcast (void ()** @[[HCKERN]] to i8*), i8** @void_ptr, align 8 +// MSVC: store void ()* @[[CSTUB]], void ()** @kernel_ptr, align 8 +// MSVC: store void ()* @[[CSTUB]], void ()** @kernel_ptr, align 8 +// MSVC: store i8* bitcast (void ()* @[[CSTUB]] to i8*), i8** @void_ptr, align 8 +// MSVC: store i8* bitcast (void ()* @[[CSTUB]] to i8*), i8** @void_ptr, align 8 +extern "C" void fun3() { kernel_ptr = ckernel; kernel_ptr = &ckernel; void_ptr = (void *)ckernel; @@ -96,34 +132,51 @@ } // Check kernel stub is loaded from kernel handle when function pointer is -// used with triple chevron - -// CHECK-LABEL: define{{.*}}@_Z4fun4v() -// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr -// CHECK: call i32 @_Z16hipConfigureCall4dim3S_mP9hipStream -// CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8 -// CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to void ()** -// CHECK: %[[STUB:.*]] = load void ()*, void ()** %[[CAST]], align 8 -// CHECK: call void %[[STUB]]() -void fun4() { +// used with triple chevron for non-MSVC target but kernel stub is directly +// used without extra indirection for MSVC target. + +// CHECK-LABEL: define{{.*}}@fun4() +// GNU: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr +// GNU: call i32 @{{.*hipConfigureCall}} +// GNU: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8 +// GNU: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to void ()** +// GNU: %[[STUB:.*]] = load void ()*, void ()** %[[CAST]], align 8 +// GNU: call void %[[STUB]]() +// MSVC: store void ()* @[[CSTUB]], void ()** @kernel_ptr +// MSVC: call i32 @{{.*hipConfigureCall}} +// MSVC: %[[STUB:.*]] = load void ()*, void ()** @kernel_ptr, align 8 +// MSVC: call void %[[STUB]]() +extern "C" void fun4() { kernel_ptr = ckernel; kernel_ptr<<<1,1>>>(); } -// Check kernel handle is passed to a function +// Check kernel handle is passed to a function for non-MSVC target but +// kernel stub is passed for MSVC target. -// CHECK-LABEL: define{{.*}}@_Z4fun5v() -// CHECK: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr +// CHECK-LABEL: define{{.*}}@fun5() +// GNU: store void ()* bitcast (void ()** @[[HCKERN]] to void ()*), void ()** @kernel_ptr +// MSVC: store void ()* @[[CSTUB]], void ()** @kernel_ptr // CHECK: %[[HANDLE:.*]] = load void ()*, void ()** @kernel_ptr, align 8 // CHECK: %[[CAST:.*]] = bitcast void ()* %[[HANDLE]] to i8* -// CHECK: call void @_Z6launchPv(i8* %[[CAST]]) -void fun5() { +// CHECK: call void @launch(i8* %[[CAST]]) +extern "C" void fun5() { kernel_ptr = ckernel; launch((void *)kernel_ptr); } +// Check kernel handle is registered for non-MSVC target but kernel stub +// is registered for MSVC target. + // CHECK-LABEL: define{{.*}}@__hip_register_globals -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]] -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]] -// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]] -// CHECK-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@[[HDKERN]]{{.*}}@{{[0-9]*}} +// GNU: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]] +// GNU: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]] +// GNU: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]] +// GNUNEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@__device_stub__ckernel{{.*}}@{{[0-9]*}} +// GNUNEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@_ZN2ns23__device_stub__nskernelEv{{.*}}@{{[0-9]*}} +// GNUNEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@_Z25__device_stub__kernelfuncIiEvv{{.*}}@{{[0-9]*}} +// GNUNEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@_Z26__device_stub__kernel_declv{{.*}}@{{[0-9]*}} +// MSVC: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]] +// MSVC: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]] +// MSVC: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]] +// MSVCNEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}@"\?kernel_decl@@YAXXZ"{{.*}}@{{[0-9]*}} Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -5322,10 +5322,11 @@ } // HIP function pointer contains kernel handle when it is used in triple - // chevron. The kernel stub needs to be loaded from kernel handle and used - // as callee. + // chevron for non-MSVC target. The kernel stub needs to be loaded from + // kernel handle and used as callee. if (CGM.getLangOpts().HIP && !CGM.getLangOpts().CUDAIsDevice && isa<CUDAKernelCallExpr>(E) && + !CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() && (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) { llvm::Value *Handle = Callee.getFunctionPointer(); auto *Cast = Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -1134,7 +1134,9 @@ if (Loc != KernelHandles.end()) return Loc->second; - if (!CGM.getLangOpts().HIP) { + // When HIP host target is MSVC, do not use kernel handle. + if (!CGM.getLangOpts().HIP || + CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft()) { KernelHandles[F] = F; KernelStubs[F] = F; return F; Index: clang/include/clang/AST/GlobalDecl.h =================================================================== --- clang/include/clang/AST/GlobalDecl.h +++ clang/include/clang/AST/GlobalDecl.h @@ -20,6 +20,7 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/Basic/ABI.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/TargetInfo.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/Casting.h" @@ -151,8 +152,11 @@ } static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) { - return D->getLangOpts().CUDAIsDevice ? KernelReferenceKind::Kernel - : KernelReferenceKind::Stub; + // When Target ABI is MSVC, do not mangle kernel stub differently. + return D->getLangOpts().CUDAIsDevice || + D->getASTContext().getTargetInfo().getCXXABI().isMicrosoft() + ? KernelReferenceKind::Kernel + : KernelReferenceKind::Stub; } GlobalDecl getWithDecl(const Decl *D) {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits