This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG9d899d8f0187: [HIP] Support `-fgpu-default-stream` (authored 
by yaxunl).
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120298/new/

https://reviews.llvm.org/D120298

Files:
  clang/include/clang/Basic/LangOptions.h
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/InitPreprocessor.cpp
  clang/test/CodeGenCUDA/Inputs/cuda.h
  clang/test/CodeGenCUDA/kernel-call.cu
  clang/test/Driver/hip-options.hip
  clang/test/Preprocessor/predefined-macros.c

Index: clang/test/Preprocessor/predefined-macros.c
===================================================================
--- clang/test/Preprocessor/predefined-macros.c
+++ clang/test/Preprocessor/predefined-macros.c
@@ -247,6 +247,7 @@
 // CHECK-HIP-NEG-NOT: #define __CUDA_ARCH__
 // CHECK-HIP-NEG-NOT: #define __HIP_DEVICE_COMPILE__ 1
 // CHECK-HIP-NEG-NOT: #define __CLANG_RDC__ 1
+// CHECK-HIP-NEG-NOT: #define HIP_API_PER_THREAD_DEFAULT_STREAM
 
 // RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \
 // RUN:   -fcuda-is-device \
@@ -265,6 +266,7 @@
 // RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP-DEV-NEG
 // CHECK-HIP-DEV-NEG-NOT: #define __CUDA_ARCH__
 // CHECK-HIP-DEV-NEG-NOT: #define __CLANG_RDC__ 1
+// CHECK-HIP-DEV-NEG-NOT: #define HIP_API_PER_THREAD_DEFAULT_STREAM
 
 // RUN: %clang_cc1 %s -E -dM -o - -x cuda -triple x86_64-unknown-linux-gnu \
 // RUN:   -fgpu-rdc | FileCheck %s --check-prefix=CHECK-RDC
@@ -277,3 +279,11 @@
 // RUN:   -fgpu-rdc -fcuda-is-device \
 // RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-RDC
 // CHECK-RDC: #define __CLANG_RDC__ 1
+
+// RUN: %clang_cc1 %s -E -dM -o - -x hip -triple x86_64-unknown-linux-gnu \
+// RUN:   -fgpu-default-stream=per-thread \
+// RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-PTH
+// RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \
+// RUN:   -fcuda-is-device -fgpu-default-stream=per-thread \
+// RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-PTH
+// CHECK-PTH: #define HIP_API_PER_THREAD_DEFAULT_STREAM 1
Index: clang/test/Driver/hip-options.hip
===================================================================
--- clang/test/Driver/hip-options.hip
+++ clang/test/Driver/hip-options.hip
@@ -14,6 +14,14 @@
 // DEVINIT: clang{{.*}}" "-cc1" {{.*}}"-fgpu-allow-device-init"
 // DEVINIT: clang{{.*}}" "-cc1" {{.*}}"-fgpu-allow-device-init"
 
+// Check -fgpu-default-stream=per-thread.
+// RUN: %clang -### -nogpuinc -nogpulib -fgpu-default-stream=per-thread \
+// RUN:   %s -save-temps 2>&1 | FileCheck -check-prefix=PTH %s
+// PTH: clang{{.*}}" "-cc1" {{.*}}"-E" {{.*}}"-fgpu-default-stream=per-thread"
+// PTH: clang{{.*}}" "-cc1" {{.*}}"-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
+// PTH: clang{{.*}}" "-cc1" {{.*}}"-E" {{.*}}"-fgpu-default-stream=per-thread"
+// PTH: clang{{.*}}" "-cc1" {{.*}}"-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
+
 // RUN: %clang -### -x hip -target x86_64-pc-windows-msvc -fms-extensions \
 // RUN:   -mllvm -amdgpu-early-inline-all=true  %s 2>&1 | \
 // RUN:   FileCheck -check-prefix=MLLVM %s
Index: clang/test/CodeGenCUDA/kernel-call.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-call.cu
+++ clang/test/CodeGenCUDA/kernel-call.cu
@@ -5,7 +5,13 @@
 // RUN: %clang_cc1 -x hip -emit-llvm %s -o - \
 // RUN: | FileCheck %s --check-prefixes=HIP-OLD,CHECK
 // RUN: %clang_cc1 -fhip-new-launch-api -x hip -emit-llvm %s -o - \
-// RUN: | FileCheck %s --check-prefixes=HIP-NEW,CHECK
+// RUN: | FileCheck %s --check-prefixes=HIP-NEW,LEGACY,CHECK
+// RUN: %clang_cc1 -fhip-new-launch-api -x hip -emit-llvm %s -o - \
+// RUN:   -fgpu-default-stream=legacy \
+// RUN:   | FileCheck %s --check-prefixes=HIP-NEW,LEGACY,CHECK
+// RUN: %clang_cc1 -fhip-new-launch-api -x hip -emit-llvm %s -o - \
+// RUN:   -fgpu-default-stream=per-thread -DHIP_API_PER_THREAD_DEFAULT_STREAM \
+// RUN:   | FileCheck %s --check-prefixes=HIP-NEW,PTH,CHECK
 
 #include "Inputs/cuda.h"
 
@@ -13,7 +19,8 @@
 // HIP-OLD: call{{.*}}hipSetupArgument
 // HIP-OLD: call{{.*}}hipLaunchByPtr
 // HIP-NEW: call{{.*}}__hipPopCallConfiguration
-// HIP-NEW: call{{.*}}hipLaunchKernel
+// LEGACY: call{{.*}}hipLaunchKernel
+// PTH: call{{.*}}hipLaunchKernel_spt
 // CUDA-OLD: call{{.*}}cudaSetupArgument
 // CUDA-OLD: call{{.*}}cudaLaunch
 // CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
Index: clang/test/CodeGenCUDA/Inputs/cuda.h
===================================================================
--- clang/test/CodeGenCUDA/Inputs/cuda.h
+++ clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -35,11 +35,18 @@
 extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
                                                  size_t sharedSize = 0,
                                                  hipStream_t stream = 0);
+#ifndef HIP_API_PER_THREAD_DEFAULT_STREAM
 extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
                                       dim3 blockDim, void **args,
                                       size_t sharedMem,
                                       hipStream_t stream);
 #else
+extern "C" hipError_t hipLaunchKernel_spt(const void *func, dim3 gridDim,
+                                      dim3 blockDim, void **args,
+                                      size_t sharedMem,
+                                      hipStream_t stream);
+#endif //HIP_API_PER_THREAD_DEFAULT_STREAM
+#else
 typedef struct cudaStream *cudaStream_t;
 typedef enum cudaError {} cudaError_t;
 extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
Index: clang/lib/Frontend/InitPreprocessor.cpp
===================================================================
--- clang/lib/Frontend/InitPreprocessor.cpp
+++ clang/lib/Frontend/InitPreprocessor.cpp
@@ -538,6 +538,9 @@
     Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
     if (LangOpts.CUDAIsDevice)
       Builder.defineMacro("__HIP_DEVICE_COMPILE__");
+    if (LangOpts.GPUDefaultStream ==
+        LangOptions::GPUDefaultStreamKind::PerThread)
+      Builder.defineMacro("HIP_API_PER_THREAD_DEFAULT_STREAM");
   }
 }
 
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6915,8 +6915,10 @@
       CmdArgs.push_back(Args.MakeArgString(Twine("-cuid=") + Twine(CUID)));
   }
 
-  if (IsHIP)
+  if (IsHIP) {
     CmdArgs.push_back("-fcuda-allow-variadic-functions");
+    Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ);
+  }
 
   if (IsCudaDevice || IsHIPDevice) {
     StringRef InlineThresh =
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -332,15 +332,22 @@
   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
 
   // Lookup cudaLaunchKernel/hipLaunchKernel function.
+  // HIP kernel launching API name depends on -fgpu-default-stream option. For
+  // the default value 'legacy', it is hipLaunchKernel. For 'per-thread',
+  // it is hipLaunchKernel_spt.
   // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
   //                              void **args, size_t sharedMem,
   //                              cudaStream_t stream);
-  // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
-  //                            void **args, size_t sharedMem,
-  //                            hipStream_t stream);
+  // hipError_t hipLaunchKernel[_spt](const void *func, dim3 gridDim,
+  //                                  dim3 blockDim, void **args,
+  //                                  size_t sharedMem, hipStream_t stream);
   TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
   DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
-  auto LaunchKernelName = addPrefixToName("LaunchKernel");
+  std::string KernelLaunchAPI = "LaunchKernel";
+  if (CGF.getLangOpts().HIP && CGF.getLangOpts().GPUDefaultStream ==
+                                   LangOptions::GPUDefaultStreamKind::PerThread)
+    KernelLaunchAPI = KernelLaunchAPI + "_spt";
+  auto LaunchKernelName = addPrefixToName(KernelLaunchAPI);
   IdentifierInfo &cudaLaunchKernelII =
       CGM.getContext().Idents.get(LaunchKernelName);
   FunctionDecl *cudaLaunchKernelFD = nullptr;
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -959,6 +959,13 @@
   TargetOpts<"NVPTXUseShortPointers">, DefaultFalse,
   PosFlag<SetTrue, [CC1Option], "Use 32-bit pointers for accessing const/local/shared address spaces">,
   NegFlag<SetFalse>>;
+def fgpu_default_stream_EQ : Joined<["-"], "fgpu-default-stream=">,
+  HelpText<"Specify default stream. Valid values are 'legacy' and 'per-thread'. The default value is 'legacy'. (HIP only)">,
+  Flags<[CC1Option]>,
+  Values<"legacy,per-thread">,
+  NormalizedValuesScope<"LangOptions::GPUDefaultStreamKind">,
+  NormalizedValues<["Legacy", "PerThread"]>,
+  MarshallingInfoEnum<LangOpts<"GPUDefaultStream">, "Legacy">;
 def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<i_Group>,
   HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group<i_Group>,
Index: clang/include/clang/Basic/LangOptions.h
===================================================================
--- clang/include/clang/Basic/LangOptions.h
+++ clang/include/clang/Basic/LangOptions.h
@@ -309,6 +309,13 @@
     ExtendTo64
   };
 
+  enum class GPUDefaultStreamKind {
+    /// Legacy default stream
+    Legacy,
+    /// Per-thread default stream
+    PerThread,
+  };
+
 public:
   /// The used language standard.
   LangStandard::Kind LangStd;
@@ -402,6 +409,9 @@
   /// input is a header file (i.e. -x c-header).
   bool IsHeaderFile = false;
 
+  /// The default stream kind used for HIP kernel launching.
+  GPUDefaultStreamKind GPUDefaultStream;
+
   LangOptions();
 
   // Define accessors/mutators for language options of enumeration type.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to