yaxunl updated this revision to Diff 265025.
yaxunl retitled this revision from "[HIP] Support -offloading-target-id" to 
"[HIP] Support target id by --offload-arch".
yaxunl edited the summary of this revision.
yaxunl added a comment.

rebased the patch and revised by passing target id by `--offload-arch`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60620/new/

https://reviews.llvm.org/D60620

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Basic/HIP.h
  clang/lib/Basic/CMakeLists.txt
  clang/lib/Basic/HIP.cpp
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/Driver/hip-invalid-target-id.hip
  clang/test/Driver/hip-target-id.hip

Index: clang/test/Driver/hip-target-id.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/hip-target-id.hip
@@ -0,0 +1,56 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908+xnack+sramecc \
+// RUN:   --offload-arch=gfx908+xnack-sramecc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck %s
+
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: {{.*}} "-target-cpu" "gfx908"
+
+// CHECK: [[OPT:".*opt"]] {{".*-gfx908-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: "-mcpu=gfx908"
+// CHECK-SAME: "-o" [[OPT_906_BC:".*-gfx908-optimized.*bc"]]
+
+// CHECK: [[LLC: ".*llc"]] [[OPT_906_BC]]
+// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-mcpu=gfx908"
+// CHECK-SAME: "-o" {{".*-gfx908-.*o"}}
+
+// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" "-mxnack" "-msram-ecc"
+
+// CHECK: [[OPT]] {{".*-gfx908\+xnack\+sramecc.*bc"}} "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: "-mcpu=gfx908"
+// CHECK-SAME: "-o" [[OPT_906XE_BC:".*-gfx908\+xnack\+sramecc.*bc"]]
+
+// CHECK: [[LLC]] [[OPT_906XE_BC]]
+// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-mcpu=gfx908"
+// CHECK-SAME: "-o" {{".*-gfx908\+xnack\+sramecc.*o"}}
+
+// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" "-mxnack" "-mno-sram-ecc"
+
+// CHECK: [[OPT]] {{".*-gfx908\+xnack.*bc"}} "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: "-mcpu=gfx908"
+// CHECK-SAME: "-o" [[OPT_906XN_BC:".*-gfx908\+xnack.*bc"]]
+
+// CHECK: [[LLC]] [[OPT_906XN_BC]]
+// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-mcpu=gfx908"
+// CHECK-SAME: "-o" {{".*-gfx908\+xnack.*o"}}
+
+
+// CHECK: {{".*clang-offload-bundler"}}
+// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx908,hip-amdgcn-amd-amdhsa-gfx908+xnack+sramecc,hip-amdgcn-amd-amdhsa-gfx908+xnack-sramecc"
Index: clang/test/Driver/hip-invalid-target-id.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/hip-invalid-target-id.hip
@@ -0,0 +1,48 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
+
+// NOPLUS: error: Unsupported CUDA gpu architecture
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908+sramecc+xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=ORDER %s
+
+// ORDER: error: Invalid HIP offloading target id: gfx908+sramecc+xnack
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908+unknown \
+// RUN:   --offload-arch=gfx908+sramecc+unknown \
+// RUN:   --offload-arch=gfx900+xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=UNK %s
+
+// UNK: error: Invalid HIP offloading target id: gfx908+unknown
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908+sramecc+unknown \
+// RUN:   --offload-arch=gfx900+xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=MIXED %s
+
+// MIXED: error: Invalid HIP offloading target id: gfx908+sramecc+unknown
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx900+xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=UNSUP %s
+
+// UNSUP: error: Invalid HIP offloading target id: gfx900+xnack
+
+
Index: clang/lib/Driver/ToolChains/HIP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -11,6 +11,7 @@
 #include "CommonArgs.h"
 #include "InputInfo.h"
 #include "clang/Basic/Cuda.h"
+#include "clang/Basic/HIP.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
@@ -117,6 +118,9 @@
     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
     llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
+  auto Pos = SubArchName.find_first_of("+-");
+  if (Pos != SubArchName.npos)
+    SubArchName = SubArchName.substr(0, Pos);
   // Construct opt command.
   ArgStringList OptArgs;
   // The input to opt is the output from llvm-link.
@@ -145,6 +149,9 @@
     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
     llvm::StringRef OutputFilePrefix, const char *InputFileName,
     bool OutputIsAsm) const {
+  auto Pos = SubArchName.find_first_of("+-");
+  if (Pos != SubArchName.npos)
+    SubArchName = SubArchName.substr(0, Pos);
   // Construct llc command.
   ArgStringList LlcArgs;
   // The input to llc is the output from opt.
@@ -282,6 +289,12 @@
   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
 
   StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
+  StringRef FeatureStr;
+  auto Pos = GpuArch.find_first_of("+-");
+  if (Pos != GpuArch.npos) {
+    FeatureStr = GpuArch.drop_front(Pos);
+    GpuArch = GpuArch.substr(0, Pos);
+  }
   assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
   (void) GpuArch;
   assert(DeviceOffloadingKind == Action::OFK_HIP &&
@@ -290,6 +303,38 @@
 
   CC1Args.push_back("-target-cpu");
   CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
+
+  // If target id contains +feature, map it to true.
+  // If target id contains -feature, map it to false.
+  // If target id does not contain feature (default), do not map it.
+  llvm::StringMap<bool> FeatureMap;
+  while (!FeatureStr.empty()) {
+    auto Pos = FeatureStr.find_first_of("+-", 1);
+    if (Pos == FeatureStr.npos)
+      Pos = FeatureStr.size();
+    FeatureMap[FeatureStr.substr(1, Pos - 1)] = FeatureStr.front() == '+';
+    FeatureStr = FeatureStr.drop_front(Pos);
+  }
+
+  // Iterate through all possible target id features for the given GPU.
+  // If it is mapped to true, pass -mfeature to clang -cc1.
+  // If it is mapped to false, pass -mno-feature to clang -cc1.
+  // If it is not in the map (default), do not pass it to clang -cc1.
+  for (auto Feature : HIP::getAllPossibleTargetIdFeatures(GpuArch)) {
+    std::string Opt = "-m";
+    auto Pos = FeatureMap.find(Feature);
+    if (Pos == FeatureMap.end())
+      continue;
+    auto FeatureOptName = Feature;
+    if (Feature == "sramecc")
+      FeatureOptName = "sram-ecc";
+    if (Pos->second)
+      Opt = Opt + FeatureOptName.str();
+    else
+      Opt = Opt + "no-" + FeatureOptName.str();
+    CC1Args.push_back(DriverArgs.MakeArgStringRef(Opt));
+  }
+
   CC1Args.push_back("-fcuda-is-device");
 
   if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -45,6 +45,7 @@
 #include "ToolChains/TCE.h"
 #include "ToolChains/WebAssembly.h"
 #include "ToolChains/XCore.h"
+#include "clang/Basic/HIP.h"
 #include "clang/Basic/Version.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
@@ -2355,8 +2356,20 @@
     bool EmitLLVM = false;
     bool EmitAsm = false;
 
+    /// Id to identify each device compilation. For CUDA it is simply the
+    /// GPU arch string. For HIP it is either the GPU arch string or GPU
+    /// arch string plus feature strings delimited by a plus sign, e.g.
+    /// gfx906+xnack.
+    struct TargetId {
+      /// Target id string which is persistent throughout the compilation.
+      const char *Id;
+      TargetId(CudaArch Arch) { Id = CudaArchToString(Arch); }
+      TargetId(const char *Id) : Id(Id) {}
+      operator const char *() { return Id; }
+      operator StringRef() { return StringRef(Id); }
+    };
     /// List of GPU architectures to use in this compilation.
-    SmallVector<CudaArch, 4> GpuArchList;
+    SmallVector<TargetId, 4> GpuArchList;
 
     /// The CUDA actions for the current input.
     ActionList CudaDeviceActions;
@@ -2439,7 +2452,7 @@
 
         for (auto Arch : GpuArchList) {
           CudaDeviceActions.push_back(UA);
-          UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch),
+          UA->registerDependentActionInfo(ToolChains[0], Arch,
                                           AssociatedOffloadKind);
         }
         return ABRT_Success;
@@ -2450,10 +2463,9 @@
 
     void appendTopLevelActions(ActionList &AL) override {
       // Utility to append actions to the top level list.
-      auto AddTopLevel = [&](Action *A, CudaArch BoundArch) {
+      auto AddTopLevel = [&](Action *A, TargetId TargetId) {
         OffloadAction::DeviceDependences Dep;
-        Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch),
-                AssociatedOffloadKind);
+        Dep.add(*A, *ToolChains.front(), TargetId, AssociatedOffloadKind);
         AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
       };
 
@@ -2481,6 +2493,8 @@
       CudaDeviceActions.clear();
     }
 
+    virtual bool IsValidOffloadArch(StringRef Arch) const = 0;
+
     bool initialize() override {
       assert(AssociatedOffloadKind == Action::OFK_Cuda ||
              AssociatedOffloadKind == Action::OFK_HIP);
@@ -2528,7 +2542,7 @@
       EmitAsm = Args.getLastArg(options::OPT_S);
 
       // Collect all cuda_gpu_arch parameters, removing duplicates.
-      std::set<CudaArch> GpuArchs;
+      std::set<StringRef> GpuArchs;
       bool Error = false;
       for (Arg *A : Args) {
         if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
@@ -2542,21 +2556,19 @@
           GpuArchs.clear();
           continue;
         }
-        CudaArch Arch = StringToCudaArch(ArchStr);
-        if (Arch == CudaArch::UNKNOWN) {
-          C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+        if (!IsValidOffloadArch(ArchStr)) {
           Error = true;
         } else if (A->getOption().matches(options::OPT_offload_arch_EQ))
-          GpuArchs.insert(Arch);
+          GpuArchs.insert(ArchStr);
         else if (A->getOption().matches(options::OPT_no_offload_arch_EQ))
-          GpuArchs.erase(Arch);
+          GpuArchs.erase(ArchStr);
         else
           llvm_unreachable("Unexpected option.");
       }
 
       // Collect list of GPUs remaining in the set.
-      for (CudaArch Arch : GpuArchs)
-        GpuArchList.push_back(Arch);
+      for (auto Arch : GpuArchs)
+        GpuArchList.push_back(Arch.data());
 
       // Default to sm_20 which is the lowest common denominator for
       // supported GPUs.  sm_20 code should work correctly, if
@@ -2578,6 +2590,15 @@
       DefaultCudaArch = CudaArch::SM_20;
     }
 
+    bool IsValidOffloadArch(StringRef ArchStr) const override {
+      CudaArch Arch = StringToCudaArch(ArchStr);
+      if (Arch == CudaArch::UNKNOWN) {
+        C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+        return false;
+      }
+      return true;
+    }
+
     ActionBuilderReturnCode
     getDeviceDependences(OffloadAction::DeviceDependences &DA,
                          phases::ID CurPhase, phases::ID FinalPhase,
@@ -2637,8 +2658,7 @@
 
           for (auto &A : {AssembleAction, BackendAction}) {
             OffloadAction::DeviceDependences DDep;
-            DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
-                     Action::OFK_Cuda);
+            DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda);
             DeviceActions.push_back(
                 C.MakeAction<OffloadAction>(DDep, A->getType()));
           }
@@ -2697,6 +2717,47 @@
 
     bool canUseBundlerUnbundler() const override { return true; }
 
+    bool IsValidOffloadArch(StringRef IdStr) const override {
+      // Return a pair of StringRef separated by the first occurrence of '+'
+      // or '-'.
+      auto ParseTargetId = [](StringRef S) {
+        std::pair<StringRef, StringRef> Ret;
+        auto Pos = S.find_first_of("+-");
+        if (Pos == S.npos) {
+          Ret.first = S;
+          return Ret;
+        }
+        Ret.first = S.substr(0, Pos);
+        Ret.second = S.drop_front(Pos + 1);
+        return Ret;
+      };
+      auto Split = ParseTargetId(IdStr);
+      const StringRef ArchStr = Split.first;
+      CudaArch Arch = StringToCudaArch(ArchStr);
+      if (Arch == CudaArch::UNKNOWN) {
+        C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+        return false;
+      }
+      StringRef Features = Split.second;
+      if (Features.empty())
+        return true;
+      auto AllFeatures = HIP::getAllPossibleTargetIdFeatures(ArchStr);
+      unsigned CurIndex = 0;
+      while (!Features.empty()) {
+        auto Splits = ParseTargetId(Features);
+        for (; CurIndex < AllFeatures.size(); ++CurIndex) {
+          if (Splits.first == AllFeatures[CurIndex])
+            break;
+        }
+        if (CurIndex == AllFeatures.size()) {
+          C.getDriver().Diag(clang::diag::err_drv_hip_bad_target_id) << IdStr;
+          return false;
+        }
+        Features = Splits.second;
+      }
+      return true;
+    };
+
     ActionBuilderReturnCode
     getDeviceDependences(OffloadAction::DeviceDependences &DA,
                          phases::ID CurPhase, phases::ID FinalPhase,
@@ -2738,8 +2799,8 @@
           // device arch of the next action being propagated to the above link
           // action.
           OffloadAction::DeviceDependences DDep;
-          DDep.add(*CudaDeviceActions[I], *ToolChains.front(),
-                   CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+          DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I],
+                   AssociatedOffloadKind);
           CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
               DDep, CudaDeviceActions[I]->getType());
         }
@@ -2795,8 +2856,8 @@
       for (auto &LI : DeviceLinkerInputs) {
         auto *DeviceLinkAction =
             C.MakeAction<LinkJobAction>(LI, types::TY_Image);
-        DA.add(*DeviceLinkAction, *ToolChains[0],
-               CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+        DA.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I],
+               AssociatedOffloadKind);
         ++I;
       }
     }
Index: clang/lib/Basic/HIP.cpp
===================================================================
--- /dev/null
+++ clang/lib/Basic/HIP.cpp
@@ -0,0 +1,26 @@
+//===--- HIP.cpp - Utilities for compiling HIP code  ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/HIP.h"
+
+namespace clang {
+
+namespace HIP {
+
+const llvm::SmallVector<llvm::StringRef, 4>
+getAllPossibleTargetIdFeatures(llvm::StringRef Device) {
+  llvm::SmallVector<llvm::StringRef, 4> Ret;
+  if (Device == "gfx902" || Device == "gfx908" || Device == "gfx909" ||
+      Device.startswith("gfx10"))
+    Ret.push_back("xnack");
+  if (Device == "gfx906" || Device == "gfx908" || Device == "gfx909")
+    Ret.push_back("sramecc");
+  return Ret;
+}
+} // namespace HIP
+} // namespace clang
Index: clang/lib/Basic/CMakeLists.txt
===================================================================
--- clang/lib/Basic/CMakeLists.txt
+++ clang/lib/Basic/CMakeLists.txt
@@ -48,6 +48,7 @@
   FileManager.cpp
   FileSystemStatCache.cpp
   FixedPoint.cpp
+  HIP.cpp
   IdentifierTable.cpp
   LangOptions.cpp
   LangStandards.cpp
Index: clang/include/clang/Basic/HIP.h
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/HIP.h
@@ -0,0 +1,30 @@
+//===--- HIP.h - Utilities for compiling HIP code  --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_HIP_H
+#define LLVM_CLANG_BASIC_HIP_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+
+namespace clang {
+
+namespace HIP {
+/// Get all feature strings that can be used in target id for \m Device.
+/// Target id is a device name with optional target id feature strings delimited
+/// by a plus or minus sign, e.g. gfx906+xnack-sramecc. Target id feature itself
+/// cannot contain plus or minus sign. Each device have limited number of
+/// predefined target id features which have to follow predefined order when
+/// showing up in a target id.
+const llvm::SmallVector<llvm::StringRef, 4>
+getAllPossibleTargetIdFeatures(llvm::StringRef Device);
+
+} // namespace HIP
+} // namespace clang
+
+#endif
Index: clang/include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -65,6 +65,7 @@
   InGroup<CudaUnknownVersion>;
 def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
 def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
+def err_drv_hip_bad_target_id : Error<"Invalid HIP offloading target id: %0">;
 def err_drv_invalid_thread_model_for_target : Error<
   "invalid thread model '%0' in '%1' for this target">;
 def err_drv_invalid_linker_name : Error<
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to