[PATCH] D145579: [Flang][AMDGPU][OpenMP] Save target features in OpenMP MLIR dialect

Dominik Adamski via Phabricator via cfe-commits Thu, 09 Mar 2023 01:02:24 -0800

domada updated this revision to Diff 503672.
domada added a comment.

Fixed formatting



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145579/new/

https://reviews.llvm.org/D145579

Files:
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/Flang.cpp
  flang/include/flang/Frontend/FrontendActions.h
  flang/lib/Frontend/FrontendActions.cpp
  flang/test/Driver/target-cpu-features.f90
  flang/test/Lower/OpenMP/target_cpu_features.f90
  llvm/include/llvm/TargetParser/TargetParser.h
  llvm/lib/TargetParser/TargetParser.cpp
  mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
  mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp

Index: mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
===================================================================
--- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1437,6 +1437,39 @@
   return false;
 }
 
+// Set the omp.target_cpu attribute on the module with the specified string
+void OpenMPDialect::setTargetCpu(Operation *module, llvm::StringRef cpu) {
+  module->setAttr(mlir::StringAttr::get(module->getContext(),
+                                        llvm::Twine{"omp.target_cpu"}),
+                  mlir::StringAttr::get(module->getContext(), cpu));
+}
+
+// Return the value of the omp.target_cpu attribute stored in the module if it
+// exists, otherwise return empty by default
+std::string OpenMPDialect::getTargetCpu(Operation *module) {
+  if (Attribute targetCpu = module->getAttr("omp.target_cpu"))
+    if (targetCpu.isa<mlir::StringAttr>())
+      return targetCpu.dyn_cast<StringAttr>().getValue().str();
+  return llvm::Twine{""}.str();
+}
+
+// Set the omp.target_cpu_features attribute on the module with
+// the specified string
+void OpenMPDialect::setTargetCpuFeatures(Operation *module,
+                                         llvm::StringRef cpuFeatures) {
+  module->setAttr(mlir::StringAttr::get(module->getContext(),
+                                        llvm::Twine{"omp.target_cpu_features"}),
+                  mlir::StringAttr::get(module->getContext(), cpuFeatures));
+}
+
+// Return the value of the omp.target_cpu_features attribute stored in the
+// module if it exists, otherwise return empty by default
+std::string OpenMPDialect::getTargetCpuFeatures(Operation *module) {
+  if (Attribute targetCpu = module->getAttr("omp.target_cpu_features"))
+    if (targetCpu.isa<mlir::StringAttr>())
+      return targetCpu.dyn_cast<StringAttr>().getValue().str();
+  return llvm::Twine{""}.str();
+}
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc"
 
Index: mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
===================================================================
--- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -36,6 +36,21 @@
     // Return the value of the omp.is_device attribute stored in the module if it
     // exists, otherwise return false by default
     static bool getIsDevice(Operation* module);
+
+    // Set the omp.target_cpu attribute on the module with the specified string
+    static void setTargetCpu(Operation* module, StringRef cpu);
+
+    // Return the value of the omp.target_cpu attribute stored in the module if it
+    // exists, otherwise return empty by default
+    static std::string getTargetCpu(Operation* module);
+
+    // Set the omp.target_cpu_features attribute on the module with
+    // the specified string
+    static void setTargetCpuFeatures(Operation* module, StringRef cpuFeatures);
+
+    // Return the value of the omp.target_cpu_features attribute stored in
+    // the module if it exists, otherwise return empty by default
+    static std::string getTargetCpuFeatures(Operation* module);
   }];
 }
 
Index: llvm/lib/TargetParser/TargetParser.cpp
===================================================================
--- llvm/lib/TargetParser/TargetParser.cpp
+++ llvm/lib/TargetParser/TargetParser.cpp
@@ -251,3 +251,212 @@
 
   return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
 }
+
+void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+                                  StringMap<bool> &Features) {
+  // XXX - What does the member GPU mean if device name string passed here?
+  if (T.isAMDGCN()) {
+    switch (parseArchAMDGCN(GPU)) {
+    case GK_GFX1103:
+    case GK_GFX1102:
+    case GK_GFX1101:
+    case GK_GFX1100:
+      Features["ci-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot8-insts"] = true;
+      Features["dot9-insts"] = true;
+      Features["dot10-insts"] = true;
+      Features["dl-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["gfx8-insts"] = true;
+      Features["gfx9-insts"] = true;
+      Features["gfx10-insts"] = true;
+      Features["gfx10-3-insts"] = true;
+      Features["gfx11-insts"] = true;
+      break;
+    case GK_GFX1036:
+    case GK_GFX1035:
+    case GK_GFX1034:
+    case GK_GFX1033:
+    case GK_GFX1032:
+    case GK_GFX1031:
+    case GK_GFX1030:
+      Features["ci-insts"] = true;
+      Features["dot1-insts"] = true;
+      Features["dot2-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot6-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot10-insts"] = true;
+      Features["dl-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["gfx8-insts"] = true;
+      Features["gfx9-insts"] = true;
+      Features["gfx10-insts"] = true;
+      Features["gfx10-3-insts"] = true;
+      Features["s-memrealtime"] = true;
+      Features["s-memtime-inst"] = true;
+      break;
+    case GK_GFX1012:
+    case GK_GFX1011:
+      Features["dot1-insts"] = true;
+      Features["dot2-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot6-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot10-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX1013:
+    case GK_GFX1010:
+      Features["dl-insts"] = true;
+      Features["ci-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["gfx8-insts"] = true;
+      Features["gfx9-insts"] = true;
+      Features["gfx10-insts"] = true;
+      Features["s-memrealtime"] = true;
+      Features["s-memtime-inst"] = true;
+      break;
+    case GK_GFX940:
+      Features["gfx940-insts"] = true;
+      Features["fp8-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX90A:
+      Features["gfx90a-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX908:
+      Features["dot3-insts"] = true;
+      Features["dot4-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot6-insts"] = true;
+      Features["mai-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX906:
+      Features["dl-insts"] = true;
+      Features["dot1-insts"] = true;
+      Features["dot2-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot10-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX90C:
+    case GK_GFX909:
+    case GK_GFX904:
+    case GK_GFX902:
+    case GK_GFX900:
+      Features["gfx9-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX810:
+    case GK_GFX805:
+    case GK_GFX803:
+    case GK_GFX802:
+    case GK_GFX801:
+      Features["gfx8-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["s-memrealtime"] = true;
+      [[fallthrough]];
+    case GK_GFX705:
+    case GK_GFX704:
+    case GK_GFX703:
+    case GK_GFX702:
+    case GK_GFX701:
+    case GK_GFX700:
+      Features["ci-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX602:
+    case GK_GFX601:
+    case GK_GFX600:
+      Features["s-memtime-inst"] = true;
+      break;
+    case GK_NONE:
+      break;
+    default:
+      llvm_unreachable("Unhandled GPU!");
+    }
+  } else {
+    if (GPU.empty())
+      GPU = "r600";
+
+    switch (llvm::AMDGPU::parseArchR600(GPU)) {
+    case GK_CAYMAN:
+    case GK_CYPRESS:
+    case GK_RV770:
+    case GK_RV670:
+      // TODO: Add fp64 when implemented.
+      break;
+    case GK_TURKS:
+    case GK_CAICOS:
+    case GK_BARTS:
+    case GK_SUMO:
+    case GK_REDWOOD:
+    case GK_JUNIPER:
+    case GK_CEDAR:
+    case GK_RV730:
+    case GK_RV710:
+    case GK_RS880:
+    case GK_R630:
+    case GK_R600:
+      break;
+    default:
+      llvm_unreachable("Unhandled GPU!");
+    }
+  }
+}
+
+static bool isWave32Capable(StringRef GPU, const Triple &T) {
+  bool IsWave32Capable = false;
+  // XXX - What does the member GPU mean if device name string passed here?
+  if (T.isAMDGCN()) {
+    switch (parseArchAMDGCN(GPU)) {
+    case GK_GFX1103:
+    case GK_GFX1102:
+    case GK_GFX1101:
+    case GK_GFX1100:
+    case GK_GFX1036:
+    case GK_GFX1035:
+    case GK_GFX1034:
+    case GK_GFX1033:
+    case GK_GFX1032:
+    case GK_GFX1031:
+    case GK_GFX1030:
+    case GK_GFX1012:
+    case GK_GFX1011:
+    case GK_GFX1013:
+    case GK_GFX1010:
+      IsWave32Capable = true;
+      break;
+    default:
+      break;
+    }
+  }
+  return IsWave32Capable;
+}
+
+bool AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
+                                   StringMap<bool> &Features,
+                                   std::string &ErrorMsg) {
+  bool IsWave32Capable = isWave32Capable(GPU, T);
+  const bool IsNullGPU = GPU.empty();
+  // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
+  const bool HaveWave32 =
+      (IsWave32Capable || IsNullGPU) && Features.count("wavefrontsize32");
+  const bool HaveWave64 = Features.count("wavefrontsize64");
+  if (HaveWave32 && HaveWave64) {
+    ErrorMsg = "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
+    return false;
+  }
+  // Don't assume any wavesize with an unknown subtarget.
+  if (!IsNullGPU) {
+    // Default to wave32 if available, or wave64 if not
+    if (!HaveWave32 && !HaveWave64) {
+      StringRef DefaultWaveSizeFeature =
+          IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
+      Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
+    }
+  }
+  return true;
+}
Index: llvm/include/llvm/TargetParser/TargetParser.h
===================================================================
--- llvm/include/llvm/TargetParser/TargetParser.h
+++ llvm/include/llvm/TargetParser/TargetParser.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_TARGETPARSER_TARGETPARSER_H
 #define LLVM_TARGETPARSER_TARGETPARSER_H
 
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace llvm {
@@ -149,6 +150,14 @@
 
 IsaVersion getIsaVersion(StringRef GPU);
 
+/// Fills Features map with default values for given target GPU
+void fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+                          StringMap<bool> &Features);
+
+/// Inserts wave size feature for given GPU into features map
+bool insertWaveSizeFeature(StringRef GPU, const Triple &T,
+                           StringMap<bool> &Features, std::string &ErrorMsg);
+
 } // namespace AMDGPU
 } // namespace llvm
 
Index: flang/test/Lower/OpenMP/target_cpu_features.f90
===================================================================
--- /dev/null
+++ flang/test/Lower/OpenMP/target_cpu_features.f90
@@ -0,0 +1,16 @@
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp %s -o - | FileCheck %s
+
+!===============================================================================
+! Target_Enter Simple
+!===============================================================================
+
+!CHECK: omp.target_cpu = "gfx908",
+!CHECK-SAME: omp.target_cpu_features = "+dot3-insts,+dot4-insts,+s-memtime-inst,
+!CHECK-SAME: +16-bit-insts,+s-memrealtime,+dot6-insts,+dl-insts,+wavefrontsize64,
+!CHECK-SAME: +gfx9-insts,+gfx8-insts,+ci-insts,+dot10-insts,+dot7-insts,
+!CHECK-SAME: +dot1-insts,+dot5-insts,+mai-insts,+dpp,+dot2-insts"
+!CHECK-LABEL: func.func @_QPomp_target_simple() {
+subroutine omp_target_simple
+end subroutine omp_target_simple
+
Index: flang/test/Driver/target-cpu-features.f90
===================================================================
--- flang/test/Driver/target-cpu-features.f90
+++ flang/test/Driver/target-cpu-features.f90
@@ -1,4 +1,4 @@
-! REQUIRES: aarch64-registered-target, x86-registered-target
+! REQUIRES: aarch64-registered-target, x86-registered-target, amdgpu-registered-target
 
 ! Test that -mcpu/march are used and that the -target-cpu and -target-features
 ! are also added to the fc1 command.
@@ -22,6 +22,8 @@
 ! RUN: %flang --target=x86_64h-linux-gnu -c %s -### 2>&1 \
 ! RUN: | FileCheck %s -check-prefix=CHECK-X86_64H
 
+! RUN: %flang --target=amdgcn-amd-amdhsa -mcpu=gfx908 -c %s -### 2>&1 \
+! RUN: | FileCheck %s -check-prefix=CHECK-AMDGPU
 
 ! Test that invalid cpu and features are ignored.
 
@@ -52,5 +54,7 @@
 ! CHECK-X86_64H: "-fc1" "-triple" "x86_64h-unknown-linux-gnu"
 ! CHECK-X86_64H-SAME: "-target-cpu" "x86-64" "-target-feature" "-rdrnd" "-target-feature" "-aes" "-target-feature" "-pclmul" "-target-feature" "-rtm" "-target-feature" "-fsgsbase"
 
+! CHECK-AMDGPU: "-fc1" "-triple" "amdgcn-amd-amdhsa"
+! CHECK-AMDGPU-SAME: "-target-cpu" "gfx908"
 ! CHECK-INVALID-CPU: 'supercpu' is not a recognized processor for this target (ignoring processor)
 ! CHECK-INVALID-FEATURE: '+superspeed' is not a recognized feature for this target (ignoring feature)
Index: flang/lib/Frontend/FrontendActions.cpp
===================================================================
--- flang/lib/Frontend/FrontendActions.cpp
+++ flang/lib/Frontend/FrontendActions.cpp
@@ -54,6 +54,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/TargetParser.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <memory>
 
@@ -89,6 +90,54 @@
          (generateRtTypeTables() || true);
 }
 
+std::string CodeGenAction::getAllTargetFeatures() {
+  std::string allFeaturesStr;
+  CompilerInstance &ci = this->getInstance();
+  const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
+  const llvm::Triple triple(targetOpts.triple);
+
+  // Clang does not append all target features to the clang -cc1 invocation.
+  // Some AMDGPU features are passed implicitly by the Clang frontend.
+  // That's why we need to extract implicit AMDGPU target features and add
+  // them to the target features specified by the user
+  if (triple.isAMDGPU()) {
+    llvm::StringRef cpu = targetOpts.cpu;
+    llvm::StringMap<bool> implicitFeaturesMap;
+    std::string errorMsg;
+    // Get the set of implicit target features
+    llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, implicitFeaturesMap);
+    if (!llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap,
+                                             errorMsg)) {
+      llvm::SMDiagnostic err;
+      CompilerInstance &ci = this->getInstance();
+      err.print(errorMsg.data(), llvm::errs());
+      unsigned diagID = ci.getDiagnostics().getCustomDiagID(
+          clang::DiagnosticsEngine::Error, "Unsupported feature ID");
+      ci.getDiagnostics().Report(diagID);
+      return allFeaturesStr;
+    }
+
+    // Add target features specified by the user
+    for (auto &userFeature : targetOpts.featuresAsWritten) {
+      std::string userKeyString = userFeature.substr(1);
+      implicitFeaturesMap[userKeyString] = (userFeature[0] == '+');
+    }
+    llvm::SmallVector<std::string> featuresVec;
+    for (auto &implicitFeatureItem : implicitFeaturesMap) {
+      featuresVec.push_back(
+          (llvm::Twine(implicitFeatureItem.second ? "+" : "-") +
+           implicitFeatureItem.first().str())
+              .str());
+    }
+
+    allFeaturesStr = llvm::join(featuresVec, ",");
+  } else {
+    allFeaturesStr = llvm::join(targetOpts.featuresAsWritten.begin(),
+                                targetOpts.featuresAsWritten.end(), ",");
+  }
+  return allFeaturesStr;
+}
+
 static void setMLIRDataLayout(mlir::ModuleOp &mlirModule,
                               const llvm::DataLayout &dl) {
   mlir::MLIRContext *context = mlirModule.getContext();
@@ -178,13 +227,16 @@
   // Fetch module from lb, so we can set
   mlirModule = std::make_unique<mlir::ModuleOp>(lb.getModule());
 
+  setUpTargetMachine();
+
   if (ci.getInvocation().getFrontendOpts().features.IsEnabled(
           Fortran::common::LanguageFeature::OpenMP)) {
     mlir::omp::OpenMPDialect::setIsDevice(
         *mlirModule, ci.getInvocation().getLangOpts().OpenMPIsDevice);
+    mlir::omp::OpenMPDialect::setTargetCpu(*mlirModule, tm->getTargetCPU());
+    mlir::omp::OpenMPDialect::setTargetCpuFeatures(
+        *mlirModule, tm->getTargetFeatureString());
   }
-
-  setUpTargetMachine();
   const llvm::DataLayout &dl = tm->createDataLayout();
   setMLIRDataLayout(*mlirModule, dl);
 
@@ -603,8 +655,7 @@
       llvm::CodeGenOpt::getLevel(CGOpts.OptimizationLevel);
   assert(OptLevelOrNone && "Invalid optimization level!");
   llvm::CodeGenOpt::Level OptLevel = *OptLevelOrNone;
-  std::string featuresStr = llvm::join(targetOpts.featuresAsWritten.begin(),
-                                       targetOpts.featuresAsWritten.end(), ",");
+  std::string featuresStr = getAllTargetFeatures();
   tm.reset(theTarget->createTargetMachine(
       theTriple, /*CPU=*/targetOpts.cpu,
       /*Features=*/featuresStr, llvm::TargetOptions(),
Index: flang/include/flang/Frontend/FrontendActions.h
===================================================================
--- flang/include/flang/Frontend/FrontendActions.h
+++ flang/include/flang/Frontend/FrontendActions.h
@@ -208,6 +208,8 @@
   /// Runs the optimization (aka middle-end) pipeline on the LLVM module
   /// associated with this action.
   void runOptimizationPipeline(llvm::raw_pwrite_stream &os);
+  /// Produces the string which represents all target features
+  std::string getAllTargetFeatures();
 
 protected:
   CodeGenAction(BackendActionTy act) : action{act} {};
Index: clang/lib/Driver/ToolChains/Flang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Flang.cpp
+++ clang/lib/Driver/ToolChains/Flang.cpp
@@ -104,6 +104,10 @@
   switch (TC.getArch()) {
   default:
     break;
+  case llvm::Triple::r600:
+    [[fallthrough]];
+  case llvm::Triple::amdgcn:
+    [[fallthrough]];
   case llvm::Triple::aarch64:
     [[fallthrough]];
   case llvm::Triple::x86_64:
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -331,6 +331,9 @@
         .Case("aruba", "cayman")
         .Default(GPUName.str());
   }
+  if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
+    return getProcessorFromTargetID(T, A->getValue()).str();
+  }
   return "";
 }
 
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -179,191 +179,19 @@
 bool AMDGPUTargetInfo::initFeatureMap(
     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
     const std::vector<std::string> &FeatureVec) const {
-  const bool IsNullCPU = CPU.empty();
-  bool IsWave32Capable = false;
 
   using namespace llvm::AMDGPU;
-
-  // XXX - What does the member GPU mean if device name string passed here?
-  if (isAMDGCN(getTriple())) {
-    switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
-    case GK_GFX1103:
-    case GK_GFX1102:
-    case GK_GFX1101:
-    case GK_GFX1100:
-      IsWave32Capable = true;
-      Features["ci-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot8-insts"] = true;
-      Features["dot9-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["gfx11-insts"] = true;
-      break;
-    case GK_GFX1036:
-    case GK_GFX1035:
-    case GK_GFX1034:
-    case GK_GFX1033:
-    case GK_GFX1032:
-    case GK_GFX1031:
-    case GK_GFX1030:
-      IsWave32Capable = true;
-      Features["ci-insts"] = true;
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["s-memrealtime"] = true;
-      Features["s-memtime-inst"] = true;
-      break;
-    case GK_GFX1012:
-    case GK_GFX1011:
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX1013:
-    case GK_GFX1010:
-      IsWave32Capable = true;
-      Features["dl-insts"] = true;
-      Features["ci-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["s-memrealtime"] = true;
-      Features["s-memtime-inst"] = true;
-      break;
-    case GK_GFX940:
-      Features["gfx940-insts"] = true;
-      Features["fp8-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX90A:
-      Features["gfx90a-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX908:
-      Features["dot3-insts"] = true;
-      Features["dot4-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["mai-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX906:
-      Features["dl-insts"] = true;
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX90C:
-    case GK_GFX909:
-    case GK_GFX904:
-    case GK_GFX902:
-    case GK_GFX900:
-      Features["gfx9-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX810:
-    case GK_GFX805:
-    case GK_GFX803:
-    case GK_GFX802:
-    case GK_GFX801:
-      Features["gfx8-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["s-memrealtime"] = true;
-      [[fallthrough]];
-    case GK_GFX705:
-    case GK_GFX704:
-    case GK_GFX703:
-    case GK_GFX702:
-    case GK_GFX701:
-    case GK_GFX700:
-      Features["ci-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX602:
-    case GK_GFX601:
-    case GK_GFX600:
-      Features["s-memtime-inst"] = true;
-      break;
-    case GK_NONE:
-      break;
-    default:
-      llvm_unreachable("Unhandled GPU!");
-    }
-  } else {
-    if (CPU.empty())
-      CPU = "r600";
-
-    switch (llvm::AMDGPU::parseArchR600(CPU)) {
-    case GK_CAYMAN:
-    case GK_CYPRESS:
-    case GK_RV770:
-    case GK_RV670:
-      // TODO: Add fp64 when implemented.
-      break;
-    case GK_TURKS:
-    case GK_CAICOS:
-    case GK_BARTS:
-    case GK_SUMO:
-    case GK_REDWOOD:
-    case GK_JUNIPER:
-    case GK_CEDAR:
-    case GK_RV730:
-    case GK_RV710:
-    case GK_RS880:
-    case GK_R630:
-    case GK_R600:
-      break;
-    default:
-      llvm_unreachable("Unhandled GPU!");
-    }
-  }
-
+  fillAMDGPUFeatureMap(CPU, getTriple(), Features);
   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
     return false;
 
-  // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
-  const bool HaveWave32 =
-      (IsWave32Capable || IsNullCPU) && Features.count("wavefrontsize32");
-  const bool HaveWave64 = Features.count("wavefrontsize64");
-
   // TODO: Should move this logic into TargetParser
-  if (HaveWave32 && HaveWave64) {
-    Diags.Report(diag::err_invalid_feature_combination)
-        << "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
+  std::string ErrorMsg;
+  if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
+    Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
     return false;
   }
 
-  // Don't assume any wavesize with an unknown subtarget.
-  if (!IsNullCPU) {
-    // Default to wave32 if available, or wave64 if not
-    if (!HaveWave32 && !HaveWave64) {
-      StringRef DefaultWaveSizeFeature =
-          IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
-      Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
-    }
-  }
-
   return true;
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D145579: [Flang][AMDGPU][OpenMP] Save target features in OpenMP MLIR dialect

Reply via email to