jhuber6 created this revision.
jhuber6 added a reviewer: jdoerfert.
Herald added subscribers: guansong, tpr, yaxunl, jvesely, jholewinski.
jhuber6 requested review of this revision.
Herald added subscribers: openmp-commits, cfe-commits, sstefan1.
Herald added projects: clang, OpenMP.
This patch adds support for the
`__kmpc_get_hardware_num_threads_in_block` function that returns the
number of threads. This was missing in the new runtime and was used by
the AMDGPU plugin which prevented it from using the new runtime. This
patchs also unified the interface for getting the thread numbers in the
frontend.

Originally authored by jdoerfert.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D111475

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  openmp/libomptarget/DeviceRTL/include/Interface.h
  openmp/libomptarget/DeviceRTL/src/Mapping.cpp
  openmp/libomptarget/DeviceRTL/src/Utils.cpp

Index: openmp/libomptarget/DeviceRTL/src/Utils.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Utils.cpp
+++ openmp/libomptarget/DeviceRTL/src/Utils.cpp
@@ -22,6 +22,7 @@
 /// Helper to keep code alive without introducing a performance penalty.
 __attribute__((used, weak, optnone)) void keepAlive() {
   __kmpc_get_hardware_thread_id_in_block();
+  __kmpc_get_hardware_num_threads_in_block();
   __kmpc_barrier_simple_spmd(nullptr, 0);
 }
 } // namespace _OMP
Index: openmp/libomptarget/DeviceRTL/src/Mapping.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -84,8 +84,9 @@
 }
 
 uint32_t getNumberOfProcessorElements() {
-  // TODO
-  return mapping::getBlockSize();
+  return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
+                         __builtin_amdgcn_grid_size_x(),
+                         __builtin_amdgcn_workgroup_size_x());
 }
 
 uint32_t getWarpId() {
@@ -230,5 +231,9 @@
 __attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
   return mapping::getThreadIdInBlock();
 }
+
+__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
+  return mapping::getNumberOfProcessorElements();
+}
 }
 #pragma omp end declare target
Index: openmp/libomptarget/DeviceRTL/include/Interface.h
===================================================================
--- openmp/libomptarget/DeviceRTL/include/Interface.h
+++ openmp/libomptarget/DeviceRTL/include/Interface.h
@@ -203,6 +203,9 @@
 /// External interface to get the thread ID.
 uint32_t __kmpc_get_hardware_thread_id_in_block();
 
+/// External interface to get the number of threads.
+uint32_t __kmpc_get_hardware_num_threads_in_block();
+
 /// Kernel
 ///
 ///{
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -32,9 +32,6 @@
 
   /// Get the id of the current thread on the GPU.
   llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override;
-
-  /// Get the maximum number of threads in a block of the GPU.
-  llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override;
 };
 
 } // CodeGen namespace.
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -46,11 +46,3 @@
       &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x);
   return Bld.CreateCall(F, llvm::None, "nvptx_tid");
 }
-
-llvm::Value *CGOpenMPRuntimeNVPTX::getGPUNumThreads(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  llvm::Function *F;
-  F = llvm::Intrinsic::getDeclaration(
-      &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x);
-  return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
-}
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -182,7 +182,7 @@
   virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0;
 
   /// Get the maximum number of threads in a block of the GPU.
-  virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0;
+  llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);
 
   /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
   /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3947,3 +3947,16 @@
   }
   CGOpenMPRuntime::clear();
 }
+
+llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  llvm::Module *M = &CGF.CGM.getModule();
+  const char *LocSize = "__kmpc_get_hardware_num_threads_in_block";
+  llvm::Function *F = M->getFunction(LocSize);
+  if (!F) {
+    F = llvm::Function::Create(
+        llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
+        llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
+  }
+  return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
+}
Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
@@ -32,9 +32,6 @@
 
   /// Get the id of the current thread on the GPU.
   llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override;
-
-  /// Get the maximum number of threads in a block of the GPU.
-  llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override;
 };
 
 } // namespace CodeGen
Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
@@ -46,16 +46,3 @@
       CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x);
   return Bld.CreateCall(F, llvm::None, "nvptx_tid");
 }
-
-llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  llvm::Module *M = &CGF.CGM.getModule();
-  const char *LocSize = "__kmpc_amdgcn_gpu_num_threads";
-  llvm::Function *F = M->getFunction(LocSize);
-  if (!F) {
-    F = llvm::Function::Create(
-        llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
-        llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
-  }
-  return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
-}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to