[PATCH] D145579: [Flang][AMDGPU][OpenMP] Save target features in OpenMP MLIR dialect

Dominik Adamski via Phabricator via cfe-commits Wed, 08 Mar 2023 04:57:21 -0800

domada created this revision.
domada added reviewers: jsjodin, agozillon, skatrak, TIFitis, kiranktp, 
dpalermo, kiranchandramohan, NimishMishra, awarzynski.
domada added projects: Flang, OpenMP, MLIR, AMDGPU.
Herald added subscribers: sunshaoce, Moerafaat, zero9178, bzcheeseman, kosarev, 
sdasgup3, wenzhicui, wrengr, cota, teijeong, rdzhabarov, tatianashp, msifontes, 
jurahul, Kayjukh, grosul1, Joonsoo, kerbowa, liufengdb, aartbik, mgester, 
arpith-jacob, csigg, antiagainst, shauheen, rriddle, mehdi_amini, jdoerfert, 
thopre, guansong, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a reviewer: sscalpone.
Herald added a project: All.
domada requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, sstefan1, 
stephenneuendorffer, nicolasvasilache, MaskRay, wdng.
Herald added a reviewer: jdoerfert.
Herald added a reviewer: nicolasvasilache.
Herald added projects: clang, LLVM.


Scope of changes:

1. Add AMDGPU target as one of valid targets for Flang.
2. Extract common code between Clang and Flang for parsing AMDGPU features
3. Store information about target in OpenMP MLIR dialect.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D145579

Files:
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/Flang.cpp
  flang/include/flang/Frontend/FrontendActions.h
  flang/lib/Frontend/FrontendActions.cpp
  flang/test/Driver/target-cpu-features.f90
  flang/test/Lower/OpenMP/target_cpu_features.f90
  llvm/include/llvm/TargetParser/TargetParser.h
  llvm/lib/TargetParser/TargetParser.cpp
  mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
  mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp

Index: mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
===================================================================
--- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1437,6 +1437,39 @@
   return false;
 }
 
+// Set the omp.target_cpu attribute on the module with the specified string
+void OpenMPDialect::setTargetCpu(Operation *module, llvm::StringRef cpu) {
+  module->setAttr(mlir::StringAttr::get(module->getContext(),
+                                        llvm::Twine{"omp.target_cpu"}),
+                  mlir::StringAttr::get(module->getContext(), cpu));
+}
+
+// Return the value of the omp.target_cpu attribute stored in the module if it
+// exists, otherwise return empty by default
+std::string OpenMPDialect::getTargetCpu(Operation *module) {
+  if (Attribute targetCpu = module->getAttr("omp.target_cpu"))
+    if (targetCpu.isa<mlir::StringAttr>())
+      return targetCpu.dyn_cast<StringAttr>().getValue().str();
+  return llvm::Twine{""}.str();
+}
+
+// Set the omp.target_cpu_features attribute on the module with
+// the specified string
+void OpenMPDialect::setTargetCpuFeatures(Operation *module,
+                                         llvm::StringRef cpuFeatures) {
+  module->setAttr(mlir::StringAttr::get(module->getContext(),
+                                        llvm::Twine{"omp.target_cpu_features"}),
+                  mlir::StringAttr::get(module->getContext(), cpuFeatures));
+}
+
+// Return the value of the omp.target_cpu_features attribute stored in the
+// module if it exists, otherwise return empty by default
+std::string OpenMPDialect::getTargetCpuFeatures(Operation *module) {
+  if (Attribute targetCpu = module->getAttr("omp.target_cpu_features"))
+    if (targetCpu.isa<mlir::StringAttr>())
+      return targetCpu.dyn_cast<StringAttr>().getValue().str();
+  return llvm::Twine{""}.str();
+}
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc"
 
Index: mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
===================================================================
--- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -36,6 +36,21 @@
     // Return the value of the omp.is_device attribute stored in the module if it
     // exists, otherwise return false by default
     static bool getIsDevice(Operation* module);
+
+    // Set the omp.target_cpu attribute on the module with the specified string
+    static void setTargetCpu(Operation* module, StringRef cpu);
+
+    // Return the value of the omp.target_cpu attribute stored in the module if it
+    // exists, otherwise return empty by default
+    static std::string getTargetCpu(Operation* module);
+
+    // Set the omp.target_cpu_features attribute on the module with
+    // the specified string
+    static void setTargetCpuFeatures(Operation* module, StringRef cpuFeatures);
+
+    // Return the value of the omp.target_cpu_features attribute stored in
+    // the module if it exists, otherwise return empty by default
+    static std::string getTargetCpuFeatures(Operation* module);
   }];
 }
 
Index: llvm/lib/TargetParser/TargetParser.cpp
===================================================================
--- llvm/lib/TargetParser/TargetParser.cpp
+++ llvm/lib/TargetParser/TargetParser.cpp
@@ -251,3 +251,212 @@
 
   return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
 }
+
+void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+                                  StringMap<bool> &Features) {
+  // XXX - What does the member GPU mean if device name string passed here?
+  if (T.isAMDGCN()) {
+    switch (parseArchAMDGCN(GPU)) {
+    case GK_GFX1103:
+    case GK_GFX1102:
+    case GK_GFX1101:
+    case GK_GFX1100:
+      Features["ci-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot8-insts"] = true;
+      Features["dot9-insts"] = true;
+      Features["dot10-insts"] = true;
+      Features["dl-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["gfx8-insts"] = true;
+      Features["gfx9-insts"] = true;
+      Features["gfx10-insts"] = true;
+      Features["gfx10-3-insts"] = true;
+      Features["gfx11-insts"] = true;
+      break;
+    case GK_GFX1036:
+    case GK_GFX1035:
+    case GK_GFX1034:
+    case GK_GFX1033:
+    case GK_GFX1032:
+    case GK_GFX1031:
+    case GK_GFX1030:
+      Features["ci-insts"] = true;
+      Features["dot1-insts"] = true;
+      Features["dot2-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot6-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot10-insts"] = true;
+      Features["dl-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["gfx8-insts"] = true;
+      Features["gfx9-insts"] = true;
+      Features["gfx10-insts"] = true;
+      Features["gfx10-3-insts"] = true;
+      Features["s-memrealtime"] = true;
+      Features["s-memtime-inst"] = true;
+      break;
+    case GK_GFX1012:
+    case GK_GFX1011:
+      Features["dot1-insts"] = true;
+      Features["dot2-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot6-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot10-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX1013:
+    case GK_GFX1010:
+      Features["dl-insts"] = true;
+      Features["ci-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["gfx8-insts"] = true;
+      Features["gfx9-insts"] = true;
+      Features["gfx10-insts"] = true;
+      Features["s-memrealtime"] = true;
+      Features["s-memtime-inst"] = true;
+      break;
+    case GK_GFX940:
+      Features["gfx940-insts"] = true;
+      Features["fp8-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX90A:
+      Features["gfx90a-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX908:
+      Features["dot3-insts"] = true;
+      Features["dot4-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot6-insts"] = true;
+      Features["mai-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX906:
+      Features["dl-insts"] = true;
+      Features["dot1-insts"] = true;
+      Features["dot2-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot10-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX90C:
+    case GK_GFX909:
+    case GK_GFX904:
+    case GK_GFX902:
+    case GK_GFX900:
+      Features["gfx9-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX810:
+    case GK_GFX805:
+    case GK_GFX803:
+    case GK_GFX802:
+    case GK_GFX801:
+      Features["gfx8-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["s-memrealtime"] = true;
+      [[fallthrough]];
+    case GK_GFX705:
+    case GK_GFX704:
+    case GK_GFX703:
+    case GK_GFX702:
+    case GK_GFX701:
+    case GK_GFX700:
+      Features["ci-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX602:
+    case GK_GFX601:
+    case GK_GFX600:
+      Features["s-memtime-inst"] = true;
+      break;
+    case GK_NONE:
+      break;
+    default:
+      llvm_unreachable("Unhandled GPU!");
+    }
+  } else {
+    if (GPU.empty())
+      GPU = "r600";
+
+    switch (llvm::AMDGPU::parseArchR600(GPU)) {
+    case GK_CAYMAN:
+    case GK_CYPRESS:
+    case GK_RV770:
+    case GK_RV670:
+      // TODO: Add fp64 when implemented.
+      break;
+    case GK_TURKS:
+    case GK_CAICOS:
+    case GK_BARTS:
+    case GK_SUMO:
+    case GK_REDWOOD:
+    case GK_JUNIPER:
+    case GK_CEDAR:
+    case GK_RV730:
+    case GK_RV710:
+    case GK_RS880:
+    case GK_R630:
+    case GK_R600:
+      break;
+    default:
+      llvm_unreachable("Unhandled GPU!");
+    }
+  }
+}
+
+static bool isWave32Capable(StringRef GPU, const Triple &T) {
+  bool IsWave32Capable = false;
+  // XXX - What does the member GPU mean if device name string passed here?
+  if (T.isAMDGCN()) {
+    switch (parseArchAMDGCN(GPU)) {
+    case GK_GFX1103:
+    case GK_GFX1102:
+    case GK_GFX1101:
+    case GK_GFX1100:
+    case GK_GFX1036:
+    case GK_GFX1035:
+    case GK_GFX1034:
+    case GK_GFX1033:
+    case GK_GFX1032:
+    case GK_GFX1031:
+    case GK_GFX1030:
+    case GK_GFX1012:
+    case GK_GFX1011:
+    case GK_GFX1013:
+    case GK_GFX1010:
+      IsWave32Capable = true;
+      break;
+    default:
+      break;
+    }
+  }
+  return IsWave32Capable;
+}
+
+bool AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
+                                   StringMap<bool> &Features,
+                                   std::string &ErrorMsg) {
+  bool IsWave32Capable = isWave32Capable(GPU, T);
+  const bool IsNullGPU = GPU.empty();
+  // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
+  const bool HaveWave32 =
+      (IsWave32Capable || IsNullGPU) && Features.count("wavefrontsize32");
+  const bool HaveWave64 = Features.count("wavefrontsize64");
+  if (HaveWave32 && HaveWave64) {
+    ErrorMsg = "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
+    return false;
+  }
+  // Don't assume any wavesize with an unknown subtarget.
+  if (!IsNullGPU) {
+    // Default to wave32 if available, or wave64 if not
+    if (!HaveWave32 && !HaveWave64) {
+      StringRef DefaultWaveSizeFeature =
+          IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
+      Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
+    }
+  }
+  return true;
+}
Index: llvm/include/llvm/TargetParser/TargetParser.h
===================================================================
--- llvm/include/llvm/TargetParser/TargetParser.h
+++ llvm/include/llvm/TargetParser/TargetParser.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_TARGETPARSER_TARGETPARSER_H
 #define LLVM_TARGETPARSER_TARGETPARSER_H
 
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace llvm {
@@ -149,6 +150,14 @@
 
 IsaVersion getIsaVersion(StringRef GPU);
 
+/// Fills Features map with default values for given target GPU
+void fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+                          StringMap<bool> &Features);
+
+/// Inserts wave size feature for given GPU into features map
+bool insertWaveSizeFeature(StringRef GPU, const Triple &T,
+                           StringMap<bool> &Features, std::string &ErrorMsg);
+
 } // namespace AMDGPU
 } // namespace llvm
 
Index: flang/test/Lower/OpenMP/target_cpu_features.f90
===================================================================
--- /dev/null
+++ flang/test/Lower/OpenMP/target_cpu_features.f90
@@ -0,0 +1,16 @@
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp %s -o - | FileCheck %s
+
+!===============================================================================
+! Target_Enter Simple
+!===============================================================================
+
+!CHECK: omp.target_cpu = "gfx908",
+!CHECK-SAME: omp.target_cpu_features = "+dot3-insts,+dot4-insts,+s-memtime-inst,
+!CHECK-SAME: +16-bit-insts,+s-memrealtime,+dot6-insts,+dl-insts,+wavefrontsize64,
+!CHECK-SAME: +gfx9-insts,+gfx8-insts,+ci-insts,+dot10-insts,+dot7-insts,
+!CHECK-SAME: +dot1-insts,+dot5-insts,+mai-insts,+dpp,+dot2-insts"
+!CHECK-LABEL: func.func @_QPomp_target_simple() {
+subroutine omp_target_simple
+end subroutine omp_target_simple
+
Index: flang/test/Driver/target-cpu-features.f90
===================================================================
--- flang/test/Driver/target-cpu-features.f90
+++ flang/test/Driver/target-cpu-features.f90
@@ -1,4 +1,4 @@
-! REQUIRES: aarch64-registered-target, x86-registered-target
+! REQUIRES: aarch64-registered-target, x86-registered-target, amdgpu-registered-target
 
 ! Test that -mcpu/march are used and that the -target-cpu and -target-features
 ! are also added to the fc1 command.
@@ -22,6 +22,8 @@
 ! RUN: %flang --target=x86_64h-linux-gnu -c %s -### 2>&1 \
 ! RUN: | FileCheck %s -check-prefix=CHECK-X86_64H
 
+! RUN: %flang --target=amdgcn-amd-amdhsa -mcpu=gfx908 -c %s -### 2>&1 \
+! RUN: | FileCheck %s -check-prefix=CHECK-AMDGPU
 
 ! Test that invalid cpu and features are ignored.
 
@@ -52,5 +54,7 @@
 ! CHECK-X86_64H: "-fc1" "-triple" "x86_64h-unknown-linux-gnu"
 ! CHECK-X86_64H-SAME: "-target-cpu" "x86-64" "-target-feature" "-rdrnd" "-target-feature" "-aes" "-target-feature" "-pclmul" "-target-feature" "-rtm" "-target-feature" "-fsgsbase"
 
+! CHECK-AMDGPU: "-fc1" "-triple" "amdgcn-amd-amdhsa"
+! CHECK-AMDGPU-SAME: "-target-cpu" "gfx908"
 ! CHECK-INVALID-CPU: 'supercpu' is not a recognized processor for this target (ignoring processor)
 ! CHECK-INVALID-FEATURE: '+superspeed' is not a recognized feature for this target (ignoring feature)
Index: flang/lib/Frontend/FrontendActions.cpp
===================================================================
--- flang/lib/Frontend/FrontendActions.cpp
+++ flang/lib/Frontend/FrontendActions.cpp
@@ -54,6 +54,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/TargetParser.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <memory>
 
@@ -89,6 +90,54 @@
          (generateRtTypeTables() || true);
 }
 
+std::string CodeGenAction::getAllTargetFeatures() {
+  std::string allFeaturesStr;
+  CompilerInstance &ci = this->getInstance();
+  const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
+  const llvm::Triple triple(targetOpts.triple);
+
+  // Clang does not append all target features to the clang -cc1 invocation.
+  // Some AMDGPU features are passed implicitly by the Clang frontend.
+  // That's why we need to extract implicit AMDGPU target features and add
+  // them to the target features specified by the user
+  if (triple.isAMDGPU()) {
+    llvm::StringRef cpu = targetOpts.cpu;
+    llvm::StringMap<bool> implicitFeaturesMap;
+    std::string errorMsg;
+    // Get the set of implicit target features
+    llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, implicitFeaturesMap);
+    if (!llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap,
+                                             errorMsg)) {
+      llvm::SMDiagnostic err;
+      CompilerInstance &ci = this->getInstance();
+      err.print(errorMsg.data(), llvm::errs());
+      unsigned diagID = ci.getDiagnostics().getCustomDiagID(
+          clang::DiagnosticsEngine::Error, "Unsupported feature ID");
+      ci.getDiagnostics().Report(diagID);
+      return allFeaturesStr;
+    }
+
+    // Add target features specified by the user
+    for (auto &userFeature : targetOpts.featuresAsWritten) {
+      std::string userKeyString = userFeature.substr(1);
+      implicitFeaturesMap[userKeyString] = (userFeature[0] == '+');
+    }
+    llvm::SmallVector<std::string> featuresVec;
+    for (auto &implicitFeatureItem : implicitFeaturesMap) {
+      featuresVec.push_back(
+          (llvm::Twine(implicitFeatureItem.second ? "+" : "-") +
+           implicitFeatureItem.first().str())
+              .str());
+    }
+
+    allFeaturesStr = llvm::join(featuresVec, ",");
+  } else {
+    allFeaturesStr = llvm::join(targetOpts.featuresAsWritten.begin(),
+                                targetOpts.featuresAsWritten.end(), ",");
+  }
+  return allFeaturesStr;
+}
+
 static void setMLIRDataLayout(mlir::ModuleOp &mlirModule,
                               const llvm::DataLayout &dl) {
   mlir::MLIRContext *context = mlirModule.getContext();
@@ -178,13 +227,16 @@
   // Fetch module from lb, so we can set
   mlirModule = std::make_unique<mlir::ModuleOp>(lb.getModule());
 
+  setUpTargetMachine();
+
   if (ci.getInvocation().getFrontendOpts().features.IsEnabled(
           Fortran::common::LanguageFeature::OpenMP)) {
     mlir::omp::OpenMPDialect::setIsDevice(
         *mlirModule, ci.getInvocation().getLangOpts().OpenMPIsDevice);
+    mlir::omp::OpenMPDialect::setTargetCpu(*mlirModule, tm->getTargetCPU());
+    mlir::omp::OpenMPDialect::setTargetCpuFeatures(
+        *mlirModule, tm->getTargetFeatureString());
   }
-
-  setUpTargetMachine();
   const llvm::DataLayout &dl = tm->createDataLayout();
   setMLIRDataLayout(*mlirModule, dl);
 
@@ -603,8 +655,7 @@
       llvm::CodeGenOpt::getLevel(CGOpts.OptimizationLevel);
   assert(OptLevelOrNone && "Invalid optimization level!");
   llvm::CodeGenOpt::Level OptLevel = *OptLevelOrNone;
-  std::string featuresStr = llvm::join(targetOpts.featuresAsWritten.begin(),
-                                       targetOpts.featuresAsWritten.end(), ",");
+  std::string featuresStr = getAllTargetFeatures();
   tm.reset(theTarget->createTargetMachine(
       theTriple, /*CPU=*/targetOpts.cpu,
       /*Features=*/featuresStr, llvm::TargetOptions(),
Index: flang/include/flang/Frontend/FrontendActions.h
===================================================================
--- flang/include/flang/Frontend/FrontendActions.h
+++ flang/include/flang/Frontend/FrontendActions.h
@@ -208,6 +208,8 @@
   /// Runs the optimization (aka middle-end) pipeline on the LLVM module
   /// associated with this action.
   void runOptimizationPipeline(llvm::raw_pwrite_stream &os);
+  /// Produces the string which represents all target features
+  std::string getAllTargetFeatures();
 
 protected:
   CodeGenAction(BackendActionTy act) : action{act} {};
Index: clang/lib/Driver/ToolChains/Flang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Flang.cpp
+++ clang/lib/Driver/ToolChains/Flang.cpp
@@ -104,6 +104,10 @@
   switch (TC.getArch()) {
   default:
     break;
+  case llvm::Triple::r600:
+    [[fallthrough]];
+  case llvm::Triple::amdgcn:
+    [[fallthrough]];
   case llvm::Triple::aarch64:
     [[fallthrough]];
   case llvm::Triple::x86_64:
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -331,6 +331,9 @@
         .Case("aruba", "cayman")
         .Default(GPUName.str());
   }
+  if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
+    return getProcessorFromTargetID(T, A->getValue()).str();
+  }
   return "";
 }
 
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -179,191 +179,20 @@
 bool AMDGPUTargetInfo::initFeatureMap(
     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
     const std::vector<std::string> &FeatureVec) const {
-  const bool IsNullCPU = CPU.empty();
-  bool IsWave32Capable = false;
 
   using namespace llvm::AMDGPU;
-
-  // XXX - What does the member GPU mean if device name string passed here?
-  if (isAMDGCN(getTriple())) {
-    switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
-    case GK_GFX1103:
-    case GK_GFX1102:
-    case GK_GFX1101:
-    case GK_GFX1100:
-      IsWave32Capable = true;
-      Features["ci-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot8-insts"] = true;
-      Features["dot9-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["gfx11-insts"] = true;
-      break;
-    case GK_GFX1036:
-    case GK_GFX1035:
-    case GK_GFX1034:
-    case GK_GFX1033:
-    case GK_GFX1032:
-    case GK_GFX1031:
-    case GK_GFX1030:
-      IsWave32Capable = true;
-      Features["ci-insts"] = true;
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["s-memrealtime"] = true;
-      Features["s-memtime-inst"] = true;
-      break;
-    case GK_GFX1012:
-    case GK_GFX1011:
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX1013:
-    case GK_GFX1010:
-      IsWave32Capable = true;
-      Features["dl-insts"] = true;
-      Features["ci-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["s-memrealtime"] = true;
-      Features["s-memtime-inst"] = true;
-      break;
-    case GK_GFX940:
-      Features["gfx940-insts"] = true;
-      Features["fp8-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX90A:
-      Features["gfx90a-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX908:
-      Features["dot3-insts"] = true;
-      Features["dot4-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["mai-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX906:
-      Features["dl-insts"] = true;
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX90C:
-    case GK_GFX909:
-    case GK_GFX904:
-    case GK_GFX902:
-    case GK_GFX900:
-      Features["gfx9-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX810:
-    case GK_GFX805:
-    case GK_GFX803:
-    case GK_GFX802:
-    case GK_GFX801:
-      Features["gfx8-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["s-memrealtime"] = true;
-      [[fallthrough]];
-    case GK_GFX705:
-    case GK_GFX704:
-    case GK_GFX703:
-    case GK_GFX702:
-    case GK_GFX701:
-    case GK_GFX700:
-      Features["ci-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX602:
-    case GK_GFX601:
-    case GK_GFX600:
-      Features["s-memtime-inst"] = true;
-      break;
-    case GK_NONE:
-      break;
-    default:
-      llvm_unreachable("Unhandled GPU!");
-    }
-  } else {
-    if (CPU.empty())
-      CPU = "r600";
-
-    switch (llvm::AMDGPU::parseArchR600(CPU)) {
-    case GK_CAYMAN:
-    case GK_CYPRESS:
-    case GK_RV770:
-    case GK_RV670:
-      // TODO: Add fp64 when implemented.
-      break;
-    case GK_TURKS:
-    case GK_CAICOS:
-    case GK_BARTS:
-    case GK_SUMO:
-    case GK_REDWOOD:
-    case GK_JUNIPER:
-    case GK_CEDAR:
-    case GK_RV730:
-    case GK_RV710:
-    case GK_RS880:
-    case GK_R630:
-    case GK_R600:
-      break;
-    default:
-      llvm_unreachable("Unhandled GPU!");
-    }
-  }
-
+  fillAMDGPUFeatureMap(CPU, getTriple(), Features);
   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
     return false;
 
-  // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
-  const bool HaveWave32 =
-      (IsWave32Capable || IsNullCPU) && Features.count("wavefrontsize32");
-  const bool HaveWave64 = Features.count("wavefrontsize64");
 
   // TODO: Should move this logic into TargetParser
-  if (HaveWave32 && HaveWave64) {
-    Diags.Report(diag::err_invalid_feature_combination)
-        << "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
+  std::string ErrorMsg;
+  if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
+    Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
     return false;
   }
 
-  // Don't assume any wavesize with an unknown subtarget.
-  if (!IsNullCPU) {
-    // Default to wave32 if available, or wave64 if not
-    if (!HaveWave32 && !HaveWave64) {
-      StringRef DefaultWaveSizeFeature =
-          IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
-      Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
-    }
-  }
-
   return true;
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D145579: [Flang][AMDGPU][OpenMP] Save target features in OpenMP MLIR dialect

Reply via email to