yaxunl created this revision.
yaxunl added a reviewer: tra.

https://reviews.llvm.org/D67947

Files:
  include/clang/Basic/LangOptions.def
  include/clang/Driver/Options.td
  lib/CodeGen/CGCUDANV.cpp
  lib/Driver/ToolChains/Clang.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Sema/SemaCUDA.cpp
  test/CodeGenCUDA/Inputs/cuda.h
  test/CodeGenCUDA/kernel-call.cu

Index: test/CodeGenCUDA/kernel-call.cu
===================================================================
--- test/CodeGenCUDA/kernel-call.cu
+++ test/CodeGenCUDA/kernel-call.cu
@@ -3,14 +3,17 @@
 // RUN: %clang_cc1 -target-sdk-version=9.2  -emit-llvm %s -o - \
 // RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK
 // RUN: %clang_cc1 -x hip -emit-llvm %s -o - \
-// RUN: | FileCheck %s --check-prefixes=HIP,CHECK
-
+// RUN: | FileCheck %s --check-prefixes=HIP-OLD,CHECK
+// RUN: %clang_cc1 -fhip-new-launch-api -x hip -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefixes=HIP-NEW,CHECK
 
 #include "Inputs/cuda.h"
 
 // CHECK-LABEL: define{{.*}}g1
-// HIP: call{{.*}}hipSetupArgument
-// HIP: call{{.*}}hipLaunchByPtr
+// HIP-OLD: call{{.*}}hipSetupArgument
+// HIP-OLD: call{{.*}}hipLaunchByPtr
+// HIP-NEW: call{{.*}}__hipPopCallConfiguration
+// HIP-NEW: call{{.*}}hipLaunchKernelByPtr
 // CUDA-OLD: call{{.*}}cudaSetupArgument
 // CUDA-OLD: call{{.*}}cudaLaunch
 // CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
@@ -19,7 +22,8 @@
 
 // CHECK-LABEL: define{{.*}}main
 int main(void) {
-  // HIP: call{{.*}}hipConfigureCall
+  // HIP-OLD: call{{.*}}hipConfigureCall
+  // HIP-NEW: call{{.*}}__hipPushCallConfiguration
   // CUDA-OLD: call{{.*}}cudaConfigureCall
   // CUDA-NEW: call{{.*}}__cudaPushCallConfiguration
   // CHECK: icmp
Index: test/CodeGenCUDA/Inputs/cuda.h
===================================================================
--- test/CodeGenCUDA/Inputs/cuda.h
+++ test/CodeGenCUDA/Inputs/cuda.h
@@ -14,12 +14,21 @@
   __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
 };
 
-typedef struct cudaStream *cudaStream_t;
-typedef enum cudaError {} cudaError_t;
 #ifdef __HIP__
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
 int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
-                     cudaStream_t stream = 0);
+                     hipStream_t stream = 0);
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+                                                 size_t sharedSize = 0,
+                                                 hipStream_t stream = 0);
+extern "C" hipError_t hipLaunchKernelByPtr(const void *func, dim3 gridDim,
+                                           dim3 blockDim, void **args,
+                                           size_t sharedMem,
+                                           hipStream_t stream);
 #else
+typedef struct cudaStream *cudaStream_t;
+typedef enum cudaError {} cudaError_t;
 extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
                                  size_t sharedSize = 0,
                                  cudaStream_t stream = 0);
Index: lib/Sema/SemaCUDA.cpp
===================================================================
--- lib/Sema/SemaCUDA.cpp
+++ lib/Sema/SemaCUDA.cpp
@@ -821,7 +821,8 @@
 
 std::string Sema::getCudaConfigureFuncName() const {
   if (getLangOpts().HIP)
-    return "hipConfigureCall";
+    return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
+                                            : "hipConfigureCall";
 
   // New CUDA kernel launch sequence.
   if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -2514,6 +2514,7 @@
     Opts.CUDADeviceApproxTranscendentals = 1;
 
   Opts.GPURelocatableDeviceCode = Args.hasArg(OPT_fgpu_rdc);
+  Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api);
 
   if (Opts.ObjC) {
     if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -4774,6 +4774,10 @@
   // Forward -cl options to -cc1
   RenderOpenCLOptions(Args, CmdArgs);
 
+  if (Args.hasFlag(options::OPT_fhip_new_launch_api,
+                   options::OPT_fno_hip_new_launch_api, false))
+    CmdArgs.push_back("-fhip-new-launch-api");
+
   if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) {
     CmdArgs.push_back(
         Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
Index: lib/CodeGen/CGCUDANV.cpp
===================================================================
--- lib/CodeGen/CGCUDANV.cpp
+++ lib/CodeGen/CGCUDANV.cpp
@@ -240,7 +240,8 @@
 
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
-                         CudaFeature::CUDA_USES_NEW_LAUNCH))
+                         CudaFeature::CUDA_USES_NEW_LAUNCH) ||
+      CGF.getLangOpts().HIPUseNewLaunchAPI)
     emitDeviceStubBodyNew(CGF, Args);
   else
     emitDeviceStubBodyLegacy(CGF, Args);
@@ -268,14 +269,19 @@
 
   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
 
-  // Lookup cudaLaunchKernel function.
+  // Lookup cudaLaunchKernel/hipLaunchKernelByPtr function.
   // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
   //                              void **args, size_t sharedMem,
   //                              cudaStream_t stream);
+  // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+  //                            void **args, size_t sharedMem,
+  //                            hipStream_t stream);
   TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
   DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+  StringRef LaunchKernelName =
+      CGF.getLangOpts().HIP ? "hipLaunchKernelByPtr" : "cudaLaunchKernel";
   IdentifierInfo &cudaLaunchKernelII =
-      CGM.getContext().Idents.get("cudaLaunchKernel");
+      CGM.getContext().Idents.get(LaunchKernelName);
   FunctionDecl *cudaLaunchKernelFD = nullptr;
   for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
     if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
@@ -283,8 +289,9 @@
   }
 
   if (cudaLaunchKernelFD == nullptr) {
-    CGM.Error(CGF.CurFuncDecl->getLocation(),
-              "Can't find declaration for cudaLaunchKernel()");
+    CGM.Error(
+        CGF.CurFuncDecl->getLocation(),
+        (llvm::Twine("Can't find declaration for ") + LaunchKernelName).str());
     return;
   }
   // Create temporary dim3 grid_dim, block_dim.
@@ -305,7 +312,7 @@
                                /*ShmemSize=*/ShmemSize.getType(),
                                /*Stream=*/Stream.getType()},
                               /*isVarArg=*/false),
-      "__cudaPopCallConfiguration");
+      addUnderscoredPrefixToName("PopCallConfiguration"));
 
   CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
                               {GridDim.getPointer(), BlockDim.getPointer(),
@@ -333,7 +340,7 @@
   const CGFunctionInfo &FI =
       CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
   llvm::FunctionCallee cudaLaunchKernelFn =
-      CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel");
+      CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
   CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
                LaunchKernelArgs);
   CGF.EmitBranch(EndBlock);
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -599,6 +599,9 @@
   HelpText<"HIP device library">;
 def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">,
   Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>;
+def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">,
+  Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">;
+def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">;
 def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group<i_Group>,
   HelpText<"Path to libomptarget-nvptx libraries">;
 def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
Index: include/clang/Basic/LangOptions.def
===================================================================
--- include/clang/Basic/LangOptions.def
+++ include/clang/Basic/LangOptions.def
@@ -227,6 +227,8 @@
 
 LANGOPT(SYCLIsDevice      , 1, 0, "Generate code for SYCL device")
 
+LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
+
 LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
 LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
 LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to