yaxunl updated this revision to Diff 322213.
yaxunl retitled this revision from "[HIP] Simplify kernel launching" to "[HIP]
Emit kernel symbol".
yaxunl edited the summary of this revision.
yaxunl added a comment.
Revised by Artem's comments.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D86376/new/
https://reviews.llvm.org/D86376
Files:
clang/lib/CodeGen/CGCUDANV.cpp
clang/test/CodeGenCUDA/Inputs/cuda.h
clang/test/CodeGenCUDA/cxx-call-kernel.cpp
clang/test/CodeGenCUDA/kernel-dbg-info.cu
clang/test/CodeGenCUDA/kernel-stub-name.cu
clang/test/CodeGenCUDA/unnamed-types.cu
Index: clang/test/CodeGenCUDA/unnamed-types.cu
===================================================================
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -54,7 +54,7 @@
[] __device__ (float x) { return x + 5.f; });
}
// HOST: @__hip_register_globals
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
-// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V<lambda_1>@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV<lambda_1>@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V<lambda_2>@?0??f1@@YAXPEAM@Z@V<lambda_3>@?0??2@YAX0@Z@V<lambda_4>@?0??2@YAX0@Z@@@YAXPEAMV<lambda_2>@?0??f1@@YAX0@Z@V<lambda_3>@?0??1@YAX0@Z@V<lambda_4>@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/test/CodeGenCUDA/kernel-stub-name.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -6,6 +6,12 @@
#include "Inputs/cuda.h"
+// Kernel handles
+
+// CHECK: @[[HCKERN:ckernel]] = constant i8* null
+// CHECK: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant i8* null
+// CHECK: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant i8* null
+
extern "C" __global__ void ckernel() {}
namespace ns {
@@ -26,9 +32,9 @@
// Non-template kernel stub functions
// CHECK: define{{.*}}@[[CSTUB:__device_stub__ckernel]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HCKERN]]
// CHECK: define{{.*}}@[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[NSSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HNSKERN]]
// CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
// CHECK: call void @[[CSTUB]]()
@@ -45,11 +51,11 @@
// Template kernel stub functions
// CHECK: define{{.*}}@[[TSTUB]]
-// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[TSTUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[HTKERN]]
// CHECK: declare{{.*}}@[[DSTUB]]
// CHECK-LABEL: define{{.*}}@__hip_register_globals
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[CSTUB]]{{.*}}@[[CKERN]]
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[NSSTUB]]{{.*}}@[[NSKERN]]
-// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[TSTUB]]{{.*}}@[[TKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HCKERN]]{{.*}}@[[CKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HNSKERN]]{{.*}}@[[NSKERN]]
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
Index: clang/test/CodeGenCUDA/kernel-dbg-info.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-dbg-info.cu
+++ clang/test/CodeGenCUDA/kernel-dbg-info.cu
@@ -30,6 +30,9 @@
*a = 1;
}
+// Kernel symbol for launching kernel.
+// CHECK: @[[SYM:ckernel]] = constant i8* null
+
// Device side kernel names
// CHECK: @[[CKERN:[0-9]*]] = {{.*}} c"ckernel\00"
@@ -40,7 +43,7 @@
// Make sure there is no !dbg between function attributes and '{'
// CHECK: define{{.*}} void @[[CSTUB:__device_stub__ckernel]]{{.*}} #{{[0-9]+}} {
// CHECK-NOT: call {{.*}}@hipLaunchByPtr{{.*}}!dbg
-// CHECK: call {{.*}}@hipLaunchByPtr{{.*}}@[[CSTUB]]
+// CHECK: call {{.*}}@hipLaunchByPtr{{.*}}@[[SYM]]
// CHECK-NOT: ret {{.*}}!dbg
// CHECK-LABEL: define {{.*}}@_Z8hostfuncPi{{.*}}!dbg
Index: clang/test/CodeGenCUDA/cxx-call-kernel.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/cxx-call-kernel.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -x hip -emit-llvm-bc %s -o %t.hip.bc
+// RUN: %clang_cc1 -mlink-bitcode-file %t.hip.bc -DHIP_PLATFORM -emit-llvm \
+// RUN: %s -o - | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: @_Z2g1i = constant i8* null
+#if __HIP__
+__global__ void g1(int x) {}
+#else
+extern void g1(int x);
+
+// CHECK: call i32 @hipLaunchKernel{{.*}}@_Z2g1i
+void test() {
+ hipLaunchKernel((void*)g1, 1, 1, nullptr, 0, 0);
+}
+
+// CHECK: __hipRegisterFunction{{.*}}@_Z2g1i
+#endif
Index: clang/test/CodeGenCUDA/Inputs/cuda.h
===================================================================
--- clang/test/CodeGenCUDA/Inputs/cuda.h
+++ clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -2,6 +2,7 @@
#include <stddef.h>
+#if __HIP__ || __CUDA__
#define __constant__ __attribute__((constant))
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
@@ -11,13 +12,22 @@
#define __managed__ __attribute__((managed))
#endif
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#else
+#define __constant__
+#define __device__
+#define __global__
+#define __host__
+#define __shared__
+#define __managed__
+#define __launch_bounds__(...)
+#endif
struct dim3 {
unsigned x, y, z;
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
};
-#ifdef __HIP__
+#if __HIP__ || HIP_PLATFORM
typedef struct hipStream *hipStream_t;
typedef enum hipError {} hipError_t;
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -42,12 +42,16 @@
llvm::LLVMContext &Context;
/// Convenience reference to the current module
llvm::Module &TheModule;
- /// Keeps track of kernel launch stubs emitted in this module
+ /// Keeps track of kernel launch stubs and handles emitted in this module
struct KernelInfo {
- llvm::Function *Kernel;
+ llvm::Function *Kernel; // stub function to help launch kernel
const Decl *D;
};
llvm::SmallVector<KernelInfo, 16> EmittedKernels;
+ // Map a device stub function to a symbol for identifying kernel in host code.
+ // For CUDA, the symbol for identifying the kernel is the same as the device
+ // stub function. For HIP, they are different.
+ llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles;
struct VarInfo {
llvm::GlobalVariable *Var;
const VarDecl *D;
@@ -270,6 +274,18 @@
void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
FunctionArgList &Args) {
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
+ llvm::GlobalValue *KernelHandle = CGF.CurFn;
+ if (CGF.getLangOpts().HIP) {
+ auto Linkage = CGF.CurFn->getLinkage();
+ auto *Var = new llvm::GlobalVariable(
+ TheModule, VoidPtrTy, /*isConstant=*/true, Linkage,
+ /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrTy),
+ CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl),
+ KernelReferenceKind::Kernel)));
+ Var->setAlignment(CGM.getPointerAlign().getAsAlign());
+ KernelHandle = Var;
+ }
+ KernelHandles[CGF.CurFn] = KernelHandle;
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
CudaFeature::CUDA_USES_NEW_LAUNCH) ||
(CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI))
@@ -348,7 +364,8 @@
ShmemSize.getPointer(), Stream.getPointer()});
// Emit the call to cudaLaunch
- llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy);
+ llvm::Value *Kernel =
+ CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy);
CallArgList LaunchKernelArgs;
LaunchKernelArgs.add(RValue::get(Kernel),
cudaLaunchKernelFD->getParamDecl(0)->getType());
@@ -403,7 +420,8 @@
// Emit the call to cudaLaunch
llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
- llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
+ llvm::Value *Arg =
+ CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy);
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
CGF.EmitBranch(EndBlock);
@@ -497,7 +515,7 @@
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
- Builder.CreateBitCast(I.Kernel, VoidPtrTy),
+ Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy),
KernelName,
KernelName,
llvm::ConstantInt::get(IntTy, -1),
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits