[PATCH] D54493: [OPENMP] Check target architecture supports unified shared memory for requires directive

Patrick Lyster via Phabricator via cfe-commits Tue, 13 Nov 2018 13:22:04 -0800

patricklyster created this revision.
patricklyster added reviewers: ABataev, Hahnfeld, RaviNarayanaswamy, mikerice, 
kkwli0, hfinkel, gtbercea.
patricklyster added projects: OpenMP, clang.
Herald added subscribers: cfe-commits, guansong, jholewinski.


Restriction on `unified_shared_memory` clause on OpenMP5.0 `requires` directive 
states that target architecture must support unified addressing (>=sm_70). This 
patch implements this restriction.


Repository:
  rC Clang

https://reviews.llvm.org/D54493

Files:
  clang/lib/CodeGen/CGDecl.cpp
  clang/lib/CodeGen/CGOpenMPRuntime.h
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  clang/test/OpenMP/requires_codegen.cpp

Index: clang/test/OpenMP/requires_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/requires_codegen.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DREGION_HOST
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_20 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_21 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_30 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_32 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_35 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_37 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_50 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_52 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_53 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_60 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_62 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_72 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_75 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
+
+#if defined(REGION_HOST) || defined(REGION_DEVICE_NO_ERR)
+// expected-no-diagnostics
+#pragma omp requires unified_shared_memory
+#endif
+
+#ifdef REGION_DEVICE
+#pragma omp requires unified_shared_memory // expected-error {{Target architecture does not support unified addressing}} 
+#endif
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -356,6 +356,11 @@
   void adjustTargetSpecificDataForLambdas(
       CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
 
+/// Perform check on requires decl to ensure that target architecture
+/// supports unified addressing
+void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+                                   const OMPRequiresDecl *D) const override;
+
 private:
   /// Track the execution mode when codegening directives within a target
   /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4442,6 +4442,74 @@
   }
 }
 
+// Get current CudaArch and ignore any unknown values
+static CudaArch getCudaArch(CodeGenModule &CGM) {
+  if (!CGM.getTarget().hasFeature("ptx"))
+    return CudaArch::UNKNOWN;
+  llvm::StringMap<bool> Features;
+  CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
+                                 CGM.getTarget().getTargetOpts().CPU,
+                                 CGM.getTarget().getTargetOpts().Features);
+  for (const auto &Feature : Features) {
+    if (Feature.getValue()) {
+      CudaArch Arch = StringToCudaArch(Feature.getKey());
+      if (Arch != CudaArch::UNKNOWN)
+        return Arch;
+    }
+  }
+  return CudaArch::UNKNOWN;
+}
+
+/// Check to see if target architecture supports unified addressing which is
+/// a restriction for OpenMP requires clause "unified_shared_memory".
+void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
+    CodeGenModule &CGM, const OMPRequiresDecl *D) const {
+  for (const OMPClause *Clause : D->clauselists()) {
+    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
+      switch (getCudaArch(CGM)) {
+      case CudaArch::SM_20:
+      case CudaArch::SM_21:
+      case CudaArch::SM_30:
+      case CudaArch::SM_32:
+      case CudaArch::SM_35:
+      case CudaArch::SM_37:
+      case CudaArch::SM_50:
+      case CudaArch::SM_52:
+      case CudaArch::SM_53:
+      case CudaArch::SM_60:
+      case CudaArch::SM_61:
+      case CudaArch::SM_62:
+        CGM.Error(Clause->getBeginLoc(),
+                  "Target architecture does not support unified addressing");
+        return;
+      case CudaArch::SM_70:
+      case CudaArch::SM_72:
+      case CudaArch::SM_75:
+      case CudaArch::GFX600:
+      case CudaArch::GFX601:
+      case CudaArch::GFX700:
+      case CudaArch::GFX701:
+      case CudaArch::GFX702:
+      case CudaArch::GFX703:
+      case CudaArch::GFX704:
+      case CudaArch::GFX801:
+      case CudaArch::GFX802:
+      case CudaArch::GFX803:
+      case CudaArch::GFX810:
+      case CudaArch::GFX900:
+      case CudaArch::GFX902:
+      case CudaArch::GFX904:
+      case CudaArch::GFX906:
+      case CudaArch::GFX909:
+      case CudaArch::UNKNOWN:
+        break;
+      case CudaArch::LAST:
+        llvm_unreachable("Unexpected Cuda arch.");
+      }
+    }
+  }
+}
+
 /// Get number of SMs and number of blocks per SM.
 static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
   std::pair<unsigned, unsigned> Data;
@@ -4451,55 +4519,45 @@
     Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM;
   if (Data.first && Data.second)
     return Data;
-  if (CGM.getTarget().hasFeature("ptx")) {
-    llvm::StringMap<bool> Features;
-    CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
-                                   CGM.getTarget().getTargetOpts().CPU,
-                                   CGM.getTarget().getTargetOpts().Features);
-    for (const auto &Feature : Features) {
-      if (Feature.getValue()) {
-        switch (StringToCudaArch(Feature.getKey())) {
-        case CudaArch::SM_20:
-        case CudaArch::SM_21:
-        case CudaArch::SM_30:
-        case CudaArch::SM_32:
-        case CudaArch::SM_35:
-        case CudaArch::SM_37:
-        case CudaArch::SM_50:
-        case CudaArch::SM_52:
-        case CudaArch::SM_53:
-          return {16, 16};
-        case CudaArch::SM_60:
-        case CudaArch::SM_61:
-        case CudaArch::SM_62:
-          return {56, 32};
-        case CudaArch::SM_70:
-        case CudaArch::SM_72:
-        case CudaArch::SM_75:
-          return {84, 32};
-        case CudaArch::GFX600:
-        case CudaArch::GFX601:
-        case CudaArch::GFX700:
-        case CudaArch::GFX701:
-        case CudaArch::GFX702:
-        case CudaArch::GFX703:
-        case CudaArch::GFX704:
-        case CudaArch::GFX801:
-        case CudaArch::GFX802:
-        case CudaArch::GFX803:
-        case CudaArch::GFX810:
-        case CudaArch::GFX900:
-        case CudaArch::GFX902:
-        case CudaArch::GFX904:
-        case CudaArch::GFX906:
-        case CudaArch::GFX909:
-        case CudaArch::UNKNOWN:
-          break;
-        case CudaArch::LAST:
-          llvm_unreachable("Unexpected Cuda arch.");
-        }
-      }
-    }
+  switch (getCudaArch(CGM)) {
+  case CudaArch::SM_20:
+  case CudaArch::SM_21:
+  case CudaArch::SM_30:
+  case CudaArch::SM_32:
+  case CudaArch::SM_35:
+  case CudaArch::SM_37:
+  case CudaArch::SM_50:
+  case CudaArch::SM_52:
+  case CudaArch::SM_53:
+    return {16, 16};
+  case CudaArch::SM_60:
+  case CudaArch::SM_61:
+  case CudaArch::SM_62:
+    return {56, 32};
+  case CudaArch::SM_70:
+  case CudaArch::SM_72:
+  case CudaArch::SM_75:
+    return {84, 32};
+  case CudaArch::GFX600:
+  case CudaArch::GFX601:
+  case CudaArch::GFX700:
+  case CudaArch::GFX701:
+  case CudaArch::GFX702:
+  case CudaArch::GFX703:
+  case CudaArch::GFX704:
+  case CudaArch::GFX801:
+  case CudaArch::GFX802:
+  case CudaArch::GFX803:
+  case CudaArch::GFX810:
+  case CudaArch::GFX900:
+  case CudaArch::GFX902:
+  case CudaArch::GFX904:
+  case CudaArch::GFX906:
+  case CudaArch::GFX909:
+  case CudaArch::UNKNOWN:
+    break;
+  case CudaArch::LAST:
+    llvm_unreachable("Unexpected Cuda arch.");
   }
   llvm_unreachable("Unexpected NVPTX target without ptx feature.");
 }
Index: clang/lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.h
+++ clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -1549,6 +1549,11 @@
   virtual void
   adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF,
                                      const OMPExecutableDirective &D) const;
+
+  /// Perform check on requires decl to ensure that target architecture
+  /// supports unified addressing
+  virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+                                             const OMPRequiresDecl *D) const {}
 };
 
 /// Class supports emissionof SIMD-only code.
Index: clang/lib/CodeGen/CGDecl.cpp
===================================================================
--- clang/lib/CodeGen/CGDecl.cpp
+++ clang/lib/CodeGen/CGDecl.cpp
@@ -2150,5 +2150,5 @@
 }
 
 void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) {
-  //Do nothing - here to avoid build errors
+  getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D);
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D54493: [OPENMP] Check target architecture supports unified shared memory for requires directive

Reply via email to