jhuber6 created this revision. jhuber6 added a reviewer: jdoerfert. Herald added subscribers: guansong, tpr, yaxunl, jvesely, jholewinski. jhuber6 requested review of this revision. Herald added subscribers: openmp-commits, cfe-commits, sstefan1. Herald added projects: clang, OpenMP.
This patch adds support for the `__kmpc_get_hardware_num_threads_in_block` function that returns the number of threads. This was missing in the new runtime and was used by the AMDGPU plugin which prevented it from using the new runtime. This patchs also unified the interface for getting the thread numbers in the frontend. Originally authored by jdoerfert. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D111475 Files: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/lib/CodeGen/CGOpenMPRuntimeGPU.h clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h openmp/libomptarget/DeviceRTL/include/Interface.h openmp/libomptarget/DeviceRTL/src/Mapping.cpp openmp/libomptarget/DeviceRTL/src/Utils.cpp
Index: openmp/libomptarget/DeviceRTL/src/Utils.cpp =================================================================== --- openmp/libomptarget/DeviceRTL/src/Utils.cpp +++ openmp/libomptarget/DeviceRTL/src/Utils.cpp @@ -22,6 +22,7 @@ /// Helper to keep code alive without introducing a performance penalty. __attribute__((used, weak, optnone)) void keepAlive() { __kmpc_get_hardware_thread_id_in_block(); + __kmpc_get_hardware_num_threads_in_block(); __kmpc_barrier_simple_spmd(nullptr, 0); } } // namespace _OMP Index: openmp/libomptarget/DeviceRTL/src/Mapping.cpp =================================================================== --- openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -84,8 +84,9 @@ } uint32_t getNumberOfProcessorElements() { - // TODO - return mapping::getBlockSize(); + return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(), + __builtin_amdgcn_grid_size_x(), + __builtin_amdgcn_workgroup_size_x()); } uint32_t getWarpId() { @@ -230,5 +231,9 @@ __attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() { return mapping::getThreadIdInBlock(); } + +__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() { + return mapping::getNumberOfProcessorElements(); +} } #pragma omp end declare target Index: openmp/libomptarget/DeviceRTL/include/Interface.h =================================================================== --- openmp/libomptarget/DeviceRTL/include/Interface.h +++ openmp/libomptarget/DeviceRTL/include/Interface.h @@ -203,6 +203,9 @@ /// External interface to get the thread ID. uint32_t __kmpc_get_hardware_thread_id_in_block(); +/// External interface to get the number of threads. +uint32_t __kmpc_get_hardware_num_threads_in_block(); + /// Kernel /// ///{ Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -32,9 +32,6 @@ /// Get the id of the current thread on the GPU. llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; - - /// Get the maximum number of threads in a block of the GPU. - llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override; }; } // CodeGen namespace. Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -46,11 +46,3 @@ &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x); return Bld.CreateCall(F, llvm::None, "nvptx_tid"); } - -llvm::Value *CGOpenMPRuntimeNVPTX::getGPUNumThreads(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Function *F; - F = llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x); - return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); -} Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.h =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -182,7 +182,7 @@ virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0; /// Get the maximum number of threads in a block of the GPU. - virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0; + llvm::Value *getGPUNumThreads(CodeGenFunction &CGF); /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -3947,3 +3947,16 @@ } CGOpenMPRuntime::clear(); } + +llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Module *M = &CGF.CGM.getModule(); + const char *LocSize = "__kmpc_get_hardware_num_threads_in_block"; + llvm::Function *F = M->getFunction(LocSize); + if (!F) { + F = llvm::Function::Create( + llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), + llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); + } + return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); +} Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h +++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h @@ -32,9 +32,6 @@ /// Get the id of the current thread on the GPU. llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; - - /// Get the maximum number of threads in a block of the GPU. - llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override; }; } // namespace CodeGen Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp @@ -46,16 +46,3 @@ CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x); return Bld.CreateCall(F, llvm::None, "nvptx_tid"); } - -llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Module *M = &CGF.CGM.getModule(); - const char *LocSize = "__kmpc_amdgcn_gpu_num_threads"; - llvm::Function *F = M->getFunction(LocSize); - if (!F) { - F = llvm::Function::Create( - llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), - llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); - } - return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); -}
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits