[PATCH] D108339: [openmp][nfc] Replace OMPGridValues array with struct

Jon Chesterfield via Phabricator via cfe-commits Thu, 19 Aug 2021 05:26:05 -0700

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG77579b99e9ce: [openmp][nfc] Replace OMPGridValues array with 
struct (authored by JonChesterfield).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108339/new/

https://reviews.llvm.org/D108339

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
  openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Index: openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -501,14 +501,11 @@
   static const unsigned HardTeamLimit =
       (1 << 16) - 1; // 64K needed to fit in uint16
   static const int DefaultNumTeams = 128;
-  static const int Max_Teams =
-      llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_Teams];
-  static const int Warp_Size =
-      llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
-  static const int Max_WG_Size =
-      llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_WG_Size];
+  static const int Max_Teams = llvm::omp::AMDGPUGridValues.GV_Max_Teams;
+  static const int Warp_Size = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
+  static const int Max_WG_Size = llvm::omp::AMDGPUGridValues.GV_Max_WG_Size;
   static const int Default_WG_Size =
-      llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Default_WG_Size];
+      llvm::omp::AMDGPUGridValues.GV_Default_WG_Size;
 
   using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *,
                                       size_t size, hsa_agent_t);
@@ -1058,9 +1055,8 @@
     DeviceInfo.WarpSize[device_id] = wavefront_size;
   } else {
     DP("Default wavefront size: %d\n",
-       llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size]);
-    DeviceInfo.WarpSize[device_id] =
-        llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
+       llvm::omp::AMDGPUGridValues.GV_Warp_Size);
+    DeviceInfo.WarpSize[device_id] = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
   }
 
   // Adjust teams to the env variables
Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -29,68 +29,69 @@
 ///
 /// Example usage in clang:
 ///   const unsigned slot_size =
-///   ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
+///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
 ///
 /// Example usage in libomptarget/deviceRTLs:
 ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 ///   #ifdef __AMDGPU__
-///     #define GRIDVAL AMDGPUGpuGridValues
+///     #define GRIDVAL AMDGPUGridValues
 ///   #else
-///     #define GRIDVAL NVPTXGpuGridValues
+///     #define GRIDVAL NVPTXGridValues
 ///   #endif
 ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
-///   llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+///   llvm::omp::GRIDVAL().GV_Warp_Size
 ///
 /// Example usage in libomptarget hsa plugin:
 ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-///   #define GRIDVAL AMDGPUGpuGridValues
+///   #define GRIDVAL AMDGPUGridValues
 ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
-///   llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+///   llvm::omp::GRIDVAL().GV_Warp_Size
 ///
 /// Example usage in libomptarget cuda plugin:
 ///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-///    #define GRIDVAL NVPTXGpuGridValues
+///    #define GRIDVAL NVPTXGridValues
 ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
-///    llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+///    llvm::omp::GRIDVAL().GV_Warp_Size
 ///
-enum GVIDX {
+
+struct GV {
   /// The maximum number of workers in a kernel.
   /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
-  GV_Threads,
+  const unsigned GV_Threads;
   /// The size reserved for data in a shared memory slot.
-  GV_Slot_Size,
+  const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
-  GV_Warp_Size,
+  const unsigned GV_Warp_Size;
   /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
   /// for NVPTX.
-  GV_Warp_Size_32,
+  const unsigned GV_Warp_Size_32;
   /// The number of bits required to represent the max number of threads in warp
-  GV_Warp_Size_Log2,
+  const unsigned GV_Warp_Size_Log2;
   /// GV_Warp_Size * GV_Slot_Size,
-  GV_Warp_Slot_Size,
+  const unsigned GV_Warp_Slot_Size;
   /// the maximum number of teams.
-  GV_Max_Teams,
+  const unsigned GV_Max_Teams;
   /// Global Memory Alignment
-  GV_Mem_Align,
+  const unsigned GV_Mem_Align;
   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  GV_Warp_Size_Log2_Mask,
+  const unsigned GV_Warp_Size_Log2_Mask;
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
-  GV_SimpleBufferSize,
+  const unsigned GV_SimpleBufferSize;
   // The absolute maximum team size for a working group
-  GV_Max_WG_Size,
+  const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
-  GV_Default_WG_Size,
+  const unsigned GV_Default_WG_Size;
   // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  GV_Max_Warp_Number,
+  const unsigned GV_Max_Warp_Number;
   /// The slot size that should be reserved for a working warp.
   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  GV_Warp_Size_Log2_MaskL
+  const unsigned GV_Warp_Size_Log2_MaskL;
 };
 
 /// For AMDGPU GPUs
-static constexpr unsigned AMDGPUGpuGridValues[] = {
+static constexpr GV AMDGPUGridValues = {
     448,       // GV_Threads
     256,       // GV_Slot_Size
     64,        // GV_Warp_Size
@@ -108,7 +109,7 @@
 };
 
 /// For Nvidia GPUs
-static constexpr unsigned NVPTXGpuGridValues[] = {
+static constexpr GV NVPTXGridValues = {
     992,               // GV_Threads
     256,               // GV_Slot_Size
     32,                // GV_Warp_Size
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -17,7 +17,6 @@
 #include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
 #include "clang/AST/StmtOpenMP.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
 namespace clang {
 namespace CodeGen {
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -339,7 +339,7 @@
     assert(!GlobalizedRD &&
            "Record for globalized variables is built already.");
     ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
-    unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+    unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
     if (IsInTTDRegion)
       EscapedDeclsForTeams = EscapedDecls.getArrayRef();
     else
@@ -535,8 +535,7 @@
 /// on the NVPTX device, to generate more efficient code.
 static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
-  unsigned LaneIDBits =
-      CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2);
+  unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
   auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
   return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
 }
@@ -546,8 +545,8 @@
 /// on the NVPTX device, to generate more efficient code.
 static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
-  unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue(
-      llvm::omp::GV_Warp_Size_Log2_Mask);
+  unsigned LaneIDMask =
+      CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
   auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
   return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
                        "nvptx_lane_id");
@@ -1308,7 +1307,7 @@
   const RecordDecl *GlobalizedRD = nullptr;
   llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
   llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
-  unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+  unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size;
   // Globalize team reductions variable unconditionally in all modes.
   if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
     getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
@@ -2089,7 +2088,7 @@
       "__openmp_nvptx_data_transfer_temporary_storage";
   llvm::GlobalVariable *TransferMedium =
       M.getGlobalVariable(TransferMediumName);
-  unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+  unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
   if (!TransferMedium) {
     auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
     unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
@@ -20,6 +20,7 @@
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/Cuda.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 
 using namespace clang;
@@ -35,7 +36,7 @@
 llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   // return constant compile-time target-specific warp size
-  unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+  unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
   return Bld.getInt32(WarpSize);
 }
 
Index: clang/lib/Basic/Targets/NVPTX.cpp
===================================================================
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -65,7 +65,7 @@
   TLSSupported = false;
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
-  GridValues = llvm::omp::NVPTXGpuGridValues;
+  GridValues = &llvm::omp::NVPTXGridValues;
   UseAddrSpaceMapMangling = true;
 
   // Define available target features
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -335,7 +335,7 @@
                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
                                         : DataLayoutStringR600);
-  GridValues = llvm::omp::AMDGPUGpuGridValues;
+  GridValues = &llvm::omp::AMDGPUGridValues;
 
   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
                      !isAMDGCN(Triple));
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -210,8 +210,8 @@
   unsigned char RegParmMax, SSERegParmMax;
   TargetCXXABI TheCXXABI;
   const LangASMap *AddrSpaceMap;
-  const unsigned *GridValues =
-      nullptr; // Array of target-specific GPU grid values that must be
+  const llvm::omp::GV *GridValues =
+      nullptr; // target-specific GPU grid values that must be
                // consistent between host RTL (plugin), device RTL, and clang.
 
   mutable StringRef PlatformName;
@@ -1410,10 +1410,10 @@
     return LangAS::Default;
   }
 
-  /// Return a target-specific GPU grid value based on the GVIDX enum \p gv
-  unsigned getGridValue(llvm::omp::GVIDX gv) const {
+  /// Return a target-specific GPU grid values
+  const llvm::omp::GV &getGridValue() const {
     assert(GridValues != nullptr && "GridValues not initialized");
-    return GridValues[gv];
+    return *GridValues;
   }
 
   /// Retrieve the name of the platform as it is used in the

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D108339: [openmp][nfc] Replace OMPGridValues array with struct

Reply via email to