yaxunl updated this revision to Diff 266410.
yaxunl added a comment.

Changed target id format to be like `gfx908:xnack+:sramecc-`.

I tried to introduce --offload-target-id but found that is not good because: 1. 
it will cause redundant code since I have to handle these options separately in 
CUDA and HIP action builder; 2. it causes unnecessary complexity since I have 
to handle interaction between `--offload-arch` and `--offload-target-id`, 
especially the special case of `all`; 3. `--offload-target-id` is really the 
same thing as `--offload-arch`. Therefore I kept using `--offload-arch`. For 
CUDA this is NFC, since it is not checked as target id.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60620/new/

https://reviews.llvm.org/D60620

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Basic/OffloadArch.h
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/CMakeLists.txt
  clang/lib/Basic/OffloadArch.cpp
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/AMDGPU.h
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/Driver/amdgpu-macros.cl
  clang/test/Driver/invalid-target-id.cl
  clang/test/Driver/invalid-target-id.hip
  clang/test/Driver/target-id-macros.cl
  clang/test/Driver/target-id-macros.hip
  clang/test/Driver/target-id.cl
  clang/test/Driver/target-id.hip

Index: clang/test/Driver/target-id.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/target-id.hip
@@ -0,0 +1,51 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908:xnack+:sramecc+ \
+// RUN:   --offload-arch=gfx908:xnack+:sramecc- \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck %s
+
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" "-fcuda-is-device"
+
+// CHECK: [[OPT:".*opt"]] {{.*}} "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: "-mcpu=gfx908"
+// CHECK-SAME: "-o" [[OPT_906_BC:".*-gfx908-optimized.*bc"]]
+
+// CHECK: [[LLC: ".*llc"]] [[OPT_906_BC]]
+// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-mcpu=gfx908"
+
+// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-target-cpu" "gfx908"
+// CHECK-SAME: {{.*}} "-target-feature" "+xnack"
+// CHECK-SAME: {{.*}} "-target-feature" "+sram-ecc"
+
+// CHECK: [[OPT]] {{.*"}} "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: "-mcpu=gfx908"
+// CHECK-SAME: "-o" [[OPT_906XE_BC:".*-gfx908:xnack\+:sramecc\+.*bc"]]
+
+// CHECK: [[LLC]] [[OPT_906XE_BC]]
+// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-mcpu=gfx908"
+
+// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-target-cpu" "gfx908"
+// CHECK-SAME: {{.*}} "-target-feature" "+xnack"
+// CHECK-SAME: {{.*}} "-target-feature" "-sram-ecc"
+
+// CHECK: [[OPT]] {{.*}} "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: "-mcpu=gfx908"
+// CHECK-SAME: "-o" [[OPT_906XN_BC:".*-gfx908:xnack\+:sramecc\-.*bc"]]
+
+// CHECK: [[LLC]] [[OPT_906XN_BC]]
+// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-mcpu=gfx908"
+
+
+// CHECK: {{".*clang-offload-bundler"}}
+// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx908,hip-amdgcn-amd-amdhsa-gfx908:xnack+:sramecc+,hip-amdgcn-amd-amdhsa-gfx908:xnack+:sramecc-"
Index: clang/test/Driver/target-id.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/target-id.cl
@@ -0,0 +1,21 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -mcpu=gfx908:xnack+:sramecc- \
+// RUN:   -nostdlib %s 2>&1 | FileCheck %s
+
+
+// RUN: %clang -### -target amdgcn-amd-amdpal \
+// RUN:   -mcpu=gfx908:xnack+:sramecc- \
+// RUN:   -nostdlib %s 2>&1 | FileCheck %s
+
+
+// RUN: %clang -### -target amdgcn--mesa3d \
+// RUN:   -mcpu=gfx908:xnack+:sramecc- \
+// RUN:   -nostdlib %s 2>&1 | FileCheck %s
+
+// CHECK: "-target-cpu" "gfx908"
+// CHECK-SAME: "-target-feature" "+xnack"
+// CHECK-SAME: "-target-feature" "-sram-ecc"
Index: clang/test/Driver/target-id-macros.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/target-id-macros.hip
@@ -0,0 +1,12 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -E -dM -target x86_64-linux-gnu --cuda-device-only \
+// RUN:   --offload-arch=gfx908:xnack+:sramecc- -nogpulib -o - %s 2>&1 \
+// RUN:   | FileCheck %s
+
+// CHECK-DAG: #define __amdgcn_processor__ "gfx908"
+// CHECK-DAG: #define __amdgcn_xnack__ 1
+// CHECK-DAG: #define __amdgcn_sramecc__ 0
+// CHECK-DAG: #define __amdgcn_target_id__ "gfx908:xnack+:sramecc-"
Index: clang/test/Driver/target-id-macros.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/target-id-macros.cl
@@ -0,0 +1,20 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \
+// RUN:   -mcpu=gfx908:xnack+:sramecc- -nogpulib -o - %s 2>&1 \
+// RUN:   | FileCheck %s
+
+// RUN: %clang -E -dM -target amdgcn-amd-amdpal \
+// RUN:   -mcpu=gfx908:xnack+:sramecc- -nogpulib -o - %s 2>&1 \
+// RUN:   | FileCheck %s
+
+// RUN: %clang -E -dM -target amdgcn--mesa3d \
+// RUN:   -mcpu=gfx908:xnack+:sramecc- -nogpulib -o - %s 2>&1 \
+// RUN:   | FileCheck %s
+
+// CHECK-DAG: #define __amdgcn_processor__ "gfx908"
+// CHECK-DAG: #define __amdgcn_xnack__ 1
+// CHECK-DAG: #define __amdgcn_sramecc__ 0
+// CHECK-DAG: #define __amdgcn_target_id__ "gfx908:xnack+:sramecc-"
Index: clang/test/Driver/invalid-target-id.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/invalid-target-id.hip
@@ -0,0 +1,62 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
+
+// NOPLUS: error: Unsupported CUDA gpu architecture
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908:sramecc+:xnack+ \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=ORDER %s
+
+// ORDER: error: Invalid target id: gfx908:sramecc+:xnack+
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908:unknown+ \
+// RUN:   --offload-arch=gfx908+sramecc+unknown \
+// RUN:   --offload-arch=gfx900+xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=UNK %s
+
+// UNK: error: Invalid target id: gfx908:unknown+
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx908:sramecc+:unknown+ \
+// RUN:   --offload-arch=gfx900+xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=MIXED %s
+
+// MIXED: error: Invalid target id: gfx908:sramecc+:unknown+
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx900:xnack+ \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=UNSUP %s
+
+// UNSUP: error: Invalid target id: gfx900:xnack+
+
+/ RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx900:xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=NOSIGN %s
+
+// NOSIGN: error: Invalid target id: gfx900:xnack
+
+/ RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx908 \
+// RUN:   --offload-arch=gfx900+xnack \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=NOCOLON %s
+
+// NOCOLON: error: Unsupported CUDA gpu architecture
Index: clang/test/Driver/invalid-target-id.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/invalid-target-id.cl
@@ -0,0 +1,45 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: not %clang -target amdgcn-amd-amdhsa \
+// RUN:   -mcpu=gfx908xnack -nostdlib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
+
+// NOPLUS: error: unknown target CPU 'gfx908xnack'
+
+// RUN: not %clang -target amdgcn-amd-amdpal \
+// RUN:   -mcpu=gfx908:sramecc+:xnack+ -nostdlib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=ORDER %s
+
+// ORDER: error: Invalid target id: gfx908:sramecc+:xnack+
+
+// RUN: not %clang -target amdgcn--mesa3d \
+// RUN:   -mcpu=gfx908:unknown+ -nostdlib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=UNK %s
+
+// UNK: error: Invalid target id: gfx908:unknown+
+
+// RUN: not %clang -target amdgcn-amd-amdhsa \
+// RUN:   -mcpu=gfx908:sramecc+:unknown+ -nostdlib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=MIXED %s
+
+// MIXED: error: Invalid target id: gfx908:sramecc+:unknown+
+
+// RUN: not %clang -target amdgcn-amd-amdhsa \
+// RUN:   -mcpu=gfx900:xnack+ -nostdlib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=UNSUP %s
+
+// UNSUP: error: Invalid target id: gfx900:xnack+
+
+// RUN: not %clang -target amdgcn-amd-amdhsa \
+// RUN:   -mcpu=gfx900:xnack -nostdlib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=NOSIGN %s
+
+// NOSIGN: error: Invalid target id: gfx900:xnack
+
+// RUN: not %clang -target amdgcn-amd-amdhsa \
+// RUN:   -mcpu=gfx900+xnack -nostdlib \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=NOCOLON %s
+
+// NOCOLON: error: unknown target CPU 'gfx900+xnack'
Index: clang/test/Driver/amdgpu-macros.cl
===================================================================
--- clang/test/Driver/amdgpu-macros.cl
+++ clang/test/Driver/amdgpu-macros.cl
@@ -310,3 +310,24 @@
 // GFX1010-DAG: #define __gfx1010__ 1
 // GFX1011-DAG: #define __gfx1011__ 1
 // GFX1012-DAG: #define __gfx1012__ 1
+
+// GFX600-DAG: #define __amdgcn_processor__ "gfx600"
+// GFX601-DAG: #define __amdgcn_processor__ "gfx601"
+// GFX700-DAG: #define __amdgcn_processor__ "gfx700"
+// GFX701-DAG: #define __amdgcn_processor__ "gfx701"
+// GFX702-DAG: #define __amdgcn_processor__ "gfx702"
+// GFX703-DAG: #define __amdgcn_processor__ "gfx703"
+// GFX704-DAG: #define __amdgcn_processor__ "gfx704"
+// GFX801-DAG: #define __amdgcn_processor__ "gfx801"
+// GFX802-DAG: #define __amdgcn_processor__ "gfx802"
+// GFX803-DAG: #define __amdgcn_processor__ "gfx803"
+// GFX810-DAG: #define __amdgcn_processor__ "gfx810"
+// GFX900-DAG: #define __amdgcn_processor__ "gfx900"
+// GFX902-DAG: #define __amdgcn_processor__ "gfx902"
+// GFX904-DAG: #define __amdgcn_processor__ "gfx904"
+// GFX906-DAG: #define __amdgcn_processor__ "gfx906"
+// GFX908-DAG: #define __amdgcn_processor__ "gfx908"
+// GFX909-DAG: #define __amdgcn_processor__ "gfx909"
+// GFX1010-DAG: #define __amdgcn_processor__ "gfx1010"
+// GFX1011-DAG: #define __amdgcn_processor__ "gfx1011"
+// GFX1012-DAG: #define __amdgcn_processor__ "gfx1012"
Index: clang/lib/Driver/ToolChains/HIP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -11,6 +11,7 @@
 #include "CommonArgs.h"
 #include "InputInfo.h"
 #include "clang/Basic/Cuda.h"
+#include "clang/Basic/OffloadArch.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
@@ -117,6 +118,7 @@
     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
     llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
+  SubArchName = parseOffloadArch(SubArchName);
   // Construct opt command.
   ArgStringList OptArgs;
   // The input to opt is the output from llvm-link.
@@ -145,6 +147,7 @@
     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
     llvm::StringRef OutputFilePrefix, const char *InputFileName,
     bool OutputIsAsm) const {
+  SubArchName = parseOffloadArch(SubArchName);
   // Construct llc command.
   ArgStringList LlcArgs;
   // The input to llc is the output from opt.
@@ -281,7 +284,8 @@
     Action::OffloadKind DeviceOffloadingKind) const {
   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
 
-  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
+  // Allow using target id in --offload-arch.
+  StringRef GpuArch = translateTargetId(DriverArgs, CC1Args);
   assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
   (void) GpuArch;
   assert(DeviceOffloadingKind == Action::OFK_HIP &&
@@ -289,8 +293,6 @@
   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
   const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
 
-  CC1Args.push_back("-target-cpu");
-  CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
   CC1Args.push_back("-fcuda-is-device");
 
   if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
@@ -391,6 +393,7 @@
   }
 
   if (!BoundArch.empty()) {
+    DAL->eraseArg(options::OPT_mcpu_EQ);
     DAL->eraseArg(options::OPT_march_EQ);
     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
   }
Index: clang/lib/Driver/ToolChains/AMDGPU.h
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.h
+++ clang/lib/Driver/ToolChains/AMDGPU.h
@@ -221,6 +221,14 @@
 
   static bool isWave64(const llvm::opt::ArgList &DriverArgs,
                        llvm::AMDGPU::GPUKind Kind);
+
+protected:
+  /// Translate -mcpu option containing target id to cc1 options.
+  /// Returns the GPU name.
+  StringRef translateTargetId(const llvm::opt::ArgList &DriverArgs,
+                              llvm::opt::ArgStringList &CC1Args) const;
+
+  StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const;
 };
 
 class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -9,6 +9,7 @@
 #include "AMDGPU.h"
 #include "CommonArgs.h"
 #include "InputInfo.h"
+#include "clang/Basic/OffloadArch.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "llvm/Option/ArgList.h"
@@ -254,16 +255,22 @@
   DerivedArgList *DAL =
       Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
 
-  // Do nothing if not OpenCL (-x cl)
-  if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
-    return DAL;
+  const OptTable &Opts = getDriver().getOpts();
 
   if (!DAL)
     DAL = new DerivedArgList(Args.getBaseArgs());
-  for (auto *A : Args)
-    DAL->append(A);
+  for (auto *A : Args) {
+    // Clang always pass -mcpu to clang -cc1 by -target-cpu. For translating
+    // target id, we have to pass it by -march.
+    if (A->getOption().matches(options::OPT_mcpu_EQ))
+      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+                        Args.getLastArgValue(options::OPT_mcpu_EQ));
+    else
+      DAL->append(A);
+  }
 
-  const OptTable &Opts = getDriver().getOpts();
+  if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
+    return DAL;
 
   // Phase 1 (.cl -> .bc)
   if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
@@ -308,7 +315,8 @@
 
   if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
       JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
-    auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch());
+    auto Kind =
+        llvm::AMDGPU::parseArchAMDGCN(parseOffloadArch(JA.getOffloadingArch()));
     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
         DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
                            options::OPT_fno_cuda_flush_denormals_to_zero,
@@ -318,7 +326,7 @@
     return llvm::DenormalMode::getIEEE();
   }
 
-  const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
+  const StringRef GpuArch = getGPUArch(DriverArgs);
   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
 
   // TODO: There are way too many flags that change this. Do we need to check
@@ -352,6 +360,8 @@
     const llvm::opt::ArgList &DriverArgs,
     llvm::opt::ArgStringList &CC1Args,
     Action::OffloadKind DeviceOffloadingKind) const {
+  // Allow using target id in -mcpu.
+  translateTargetId(DriverArgs, CC1Args);
   // Default to "hidden" visibility, as object level linking will not be
   // supported for the foreseeable future.
   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
@@ -362,6 +372,56 @@
   }
 }
 
+StringRef
+AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
+  assert(!DriverArgs.hasArg(options::OPT_mcpu_EQ) &&
+         "-mcpu should have been translated to -march");
+  return parseOffloadArch(DriverArgs.getLastArgValue(options::OPT_march_EQ));
+}
+
+StringRef
+AMDGPUToolChain::translateTargetId(const llvm::opt::ArgList &DriverArgs,
+                                   llvm::opt::ArgStringList &CC1Args) const {
+  StringRef GpuArch;
+  llvm::StringMap<bool> FeatureMap;
+  assert(!DriverArgs.hasArg(options::OPT_mcpu_EQ) &&
+         "-mcpu should have been translated to -march");
+  StringRef TargetId = DriverArgs.getLastArgValue(options::OPT_march_EQ);
+  if (TargetId.empty())
+    return GpuArch;
+
+  bool IsValid;
+  GpuArch = parseOffloadArch(TargetId, &FeatureMap, &IsValid);
+  if (!IsValid) {
+    getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetId;
+    return GpuArch;
+  }
+
+  if (GpuArch.empty())
+    return GpuArch;
+
+  CC1Args.push_back("-target-cpu");
+  CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
+
+  // Iterate through all possible target id features for the given GPU.
+  // If it is mapped to true, pass -mfeature to clang -cc1.
+  // If it is mapped to false, pass -mno-feature to clang -cc1.
+  // If it is not in the map (default), do not pass it to clang -cc1.
+  for (auto Feature : getAllPossibleOffloadArchFeatures(GpuArch)) {
+    auto Pos = FeatureMap.find(Feature);
+    if (Pos == FeatureMap.end())
+      continue;
+    CC1Args.push_back("-target-feature");
+    auto FeatureName = Feature;
+    if (Feature == "sramecc")
+      FeatureName = "sram-ecc";
+    std::string Opt = (Twine(Pos->second ? "+" : "-") + FeatureName).str();
+    CC1Args.push_back(DriverArgs.MakeArgStringRef(Opt));
+  }
+
+  return GpuArch;
+}
+
 void ROCMToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
     Action::OffloadKind DeviceOffloadingKind) const {
@@ -383,7 +443,7 @@
   }
 
   // Get the device name and canonicalize it
-  const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
+  const StringRef GpuArch = getGPUArch(DriverArgs);
   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
   const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
   std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -45,6 +45,7 @@
 #include "ToolChains/TCE.h"
 #include "ToolChains/WebAssembly.h"
 #include "ToolChains/XCore.h"
+#include "clang/Basic/OffloadArch.h"
 #include "clang/Basic/Version.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
@@ -2355,8 +2356,20 @@
     bool EmitLLVM = false;
     bool EmitAsm = false;
 
+    /// Id to identify each device compilation. For CUDA it is simply the
+    /// GPU arch string. For HIP it is either the GPU arch string or GPU
+    /// arch string plus feature strings delimited by a plus sign, e.g.
+    /// gfx906+xnack.
+    struct TargetId {
+      /// Target id string which is persistent throughout the compilation.
+      const char *Id;
+      TargetId(CudaArch Arch) { Id = CudaArchToString(Arch); }
+      TargetId(const char *Id) : Id(Id) {}
+      operator const char *() { return Id; }
+      operator StringRef() { return StringRef(Id); }
+    };
     /// List of GPU architectures to use in this compilation.
-    SmallVector<CudaArch, 4> GpuArchList;
+    SmallVector<TargetId, 4> GpuArchList;
 
     /// The CUDA actions for the current input.
     ActionList CudaDeviceActions;
@@ -2439,7 +2452,7 @@
 
         for (auto Arch : GpuArchList) {
           CudaDeviceActions.push_back(UA);
-          UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch),
+          UA->registerDependentActionInfo(ToolChains[0], Arch,
                                           AssociatedOffloadKind);
         }
         return ABRT_Success;
@@ -2450,10 +2463,9 @@
 
     void appendTopLevelActions(ActionList &AL) override {
       // Utility to append actions to the top level list.
-      auto AddTopLevel = [&](Action *A, CudaArch BoundArch) {
+      auto AddTopLevel = [&](Action *A, TargetId TargetId) {
         OffloadAction::DeviceDependences Dep;
-        Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch),
-                AssociatedOffloadKind);
+        Dep.add(*A, *ToolChains.front(), TargetId, AssociatedOffloadKind);
         AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
       };
 
@@ -2481,6 +2493,8 @@
       CudaDeviceActions.clear();
     }
 
+    virtual bool IsValidOffloadArch(StringRef Arch) const = 0;
+
     bool initialize() override {
       assert(AssociatedOffloadKind == Action::OFK_Cuda ||
              AssociatedOffloadKind == Action::OFK_HIP);
@@ -2528,7 +2542,7 @@
       EmitAsm = Args.getLastArg(options::OPT_S);
 
       // Collect all cuda_gpu_arch parameters, removing duplicates.
-      std::set<CudaArch> GpuArchs;
+      std::set<StringRef> GpuArchs;
       bool Error = false;
       for (Arg *A : Args) {
         if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
@@ -2542,21 +2556,19 @@
           GpuArchs.clear();
           continue;
         }
-        CudaArch Arch = StringToCudaArch(ArchStr);
-        if (Arch == CudaArch::UNKNOWN) {
-          C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+        if (!IsValidOffloadArch(ArchStr)) {
           Error = true;
         } else if (A->getOption().matches(options::OPT_offload_arch_EQ))
-          GpuArchs.insert(Arch);
+          GpuArchs.insert(ArchStr);
         else if (A->getOption().matches(options::OPT_no_offload_arch_EQ))
-          GpuArchs.erase(Arch);
+          GpuArchs.erase(ArchStr);
         else
           llvm_unreachable("Unexpected option.");
       }
 
       // Collect list of GPUs remaining in the set.
-      for (CudaArch Arch : GpuArchs)
-        GpuArchList.push_back(Arch);
+      for (auto Arch : GpuArchs)
+        GpuArchList.push_back(Arch.data());
 
       // Default to sm_20 which is the lowest common denominator for
       // supported GPUs.  sm_20 code should work correctly, if
@@ -2578,6 +2590,15 @@
       DefaultCudaArch = CudaArch::SM_20;
     }
 
+    bool IsValidOffloadArch(StringRef ArchStr) const override {
+      CudaArch Arch = StringToCudaArch(ArchStr);
+      if (Arch == CudaArch::UNKNOWN) {
+        C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+        return false;
+      }
+      return true;
+    }
+
     ActionBuilderReturnCode
     getDeviceDependences(OffloadAction::DeviceDependences &DA,
                          phases::ID CurPhase, phases::ID FinalPhase,
@@ -2637,8 +2658,7 @@
 
           for (auto &A : {AssembleAction, BackendAction}) {
             OffloadAction::DeviceDependences DDep;
-            DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
-                     Action::OFK_Cuda);
+            DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda);
             DeviceActions.push_back(
                 C.MakeAction<OffloadAction>(DDep, A->getType()));
           }
@@ -2697,6 +2717,22 @@
 
     bool canUseBundlerUnbundler() const override { return true; }
 
+    bool IsValidOffloadArch(StringRef IdStr) const override {
+      bool IsValid;
+      const StringRef ArchStr =
+          parseOffloadArch(IdStr, /*FeatureMap=*/nullptr, &IsValid);
+      CudaArch Arch = StringToCudaArch(ArchStr);
+      if (Arch == CudaArch::UNKNOWN) {
+        C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+        return false;
+      }
+      if (!IsValid) {
+        C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
+        return false;
+      }
+      return true;
+    };
+
     ActionBuilderReturnCode
     getDeviceDependences(OffloadAction::DeviceDependences &DA,
                          phases::ID CurPhase, phases::ID FinalPhase,
@@ -2738,8 +2774,8 @@
           // device arch of the next action being propagated to the above link
           // action.
           OffloadAction::DeviceDependences DDep;
-          DDep.add(*CudaDeviceActions[I], *ToolChains.front(),
-                   CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+          DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I],
+                   AssociatedOffloadKind);
           CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
               DDep, CudaDeviceActions[I]->getType());
         }
@@ -2795,8 +2831,8 @@
       for (auto &LI : DeviceLinkerInputs) {
         auto *DeviceLinkAction =
             C.MakeAction<LinkJobAction>(LI, types::TY_Image);
-        DA.add(*DeviceLinkAction, *ToolChains[0],
-               CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+        DA.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I],
+               AssociatedOffloadKind);
         ++I;
       }
     }
Index: clang/lib/Basic/Targets/AMDGPU.h
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.h
+++ clang/lib/Basic/Targets/AMDGPU.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
 #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
 
+#include "clang/Basic/OffloadArch.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
 #include "llvm/ADT/StringSet.h"
@@ -41,6 +42,14 @@
   llvm::AMDGPU::GPUKind GPUKind;
   unsigned GPUFeatures;
 
+  /// Features set by extended --offload-arch, -mcpu, or -target-cpu options
+  /// by the format of GPU name followed by optional feature name delimitted
+  /// by plus or minus sign, e.g. gfx908+xnack-sramecc.
+  /// If the offload arch contains +feature, map it to true.
+  /// If the offload arch contains -feature, map it to false.
+  /// If the offload arch does not contain a feature (default), do not map it.
+  llvm::StringMap<bool> OffloadArchFeatures;
+
   bool hasFP64() const {
     return getTriple().getArch() == llvm::Triple::amdgcn ||
            !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64);
@@ -361,6 +370,25 @@
   void setAuxTarget(const TargetInfo *Aux) override;
 
   bool hasExtIntType() const override { return true; }
+
+  // Record offload arch features since they are needed for defining the
+  // pre-defined macros.
+  bool handleTargetFeatures(std::vector<std::string> &Features,
+                            DiagnosticsEngine &Diags) override {
+    for (auto &F : Features) {
+      llvm::errs() << "Feature: " << F << '\n';
+      assert(F.front() == '+' || F.front() == '-');
+      bool IsOn = F.front() == '+';
+      StringRef Name = StringRef(F).drop_front();
+      if (Name == "sram-ecc")
+        Name = "sramecc";
+      if (Name != "xnack" && Name != "sramecc")
+        continue;
+      assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end());
+      OffloadArchFeatures[Name] = IsOn;
+    }
+    return true;
+  }
 };
 
 } // namespace targets
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -334,6 +334,22 @@
     StringRef CanonName = isAMDGCN(getTriple()) ?
       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
+    if (isAMDGCN(getTriple())) {
+      Builder.defineMacro("__amdgcn_processor__",
+                          Twine("\"") + Twine(CanonName) + Twine("\""));
+
+      std::string TargetId = CanonName.str();
+      for (auto F : getAllPossibleOffloadArchFeatures(CanonName)) {
+        auto Loc = OffloadArchFeatures.find(F);
+        if (Loc != OffloadArchFeatures.end()) {
+          Builder.defineMacro(Twine("__amdgcn_") + Twine(F) + Twine("__"),
+                              Loc->second ? "1" : "0");
+          TargetId = TargetId + ':' + F.str() + (Loc->second ? "+" : "-");
+        }
+      }
+      Builder.defineMacro("__amdgcn_target_id__",
+                          Twine("\"") + Twine(TargetId) + Twine("\""));
+    }
   }
 
   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
Index: clang/lib/Basic/OffloadArch.cpp
===================================================================
--- /dev/null
+++ clang/lib/Basic/OffloadArch.cpp
@@ -0,0 +1,66 @@
+//===--- OffloadArch.cpp - Utilities for parsing offload arch -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/OffloadArch.h"
+#include "llvm/Support/raw_ostream.h"
+namespace clang {
+
+const llvm::SmallVector<llvm::StringRef, 4>
+getAllPossibleOffloadArchFeatures(llvm::StringRef Device) {
+  llvm::SmallVector<llvm::StringRef, 4> Ret;
+  if (Device == "gfx902" || Device == "gfx908" || Device == "gfx909" ||
+      Device.startswith("gfx10"))
+    Ret.push_back("xnack");
+  if (Device == "gfx906" || Device == "gfx908" || Device == "gfx909")
+    Ret.push_back("sramecc");
+  return Ret;
+}
+
+llvm::StringRef parseOffloadArch(llvm::StringRef OffloadArch,
+                                 llvm::StringMap<bool> *FeatureMap,
+                                 bool *IsValid) {
+  llvm::StringRef ArchStr;
+  auto SetValid = [&](bool Valid) {
+    if (IsValid)
+      *IsValid = Valid;
+    return ArchStr;
+  };
+
+  auto Split = OffloadArch.split(':');
+  ArchStr = Split.first;
+  if (!FeatureMap && !IsValid)
+    return ArchStr;
+
+  auto Features = Split.second;
+  if (Features.empty())
+    return SetValid(true);
+
+  auto AllFeatures = getAllPossibleOffloadArchFeatures(ArchStr);
+  unsigned CurIndex = 0;
+  while (!Features.empty()) {
+    auto Splits = Features.split(':');
+    auto Sign = Splits.first.back();
+    auto Feature = Splits.first.drop_back();
+    llvm::errs() << Feature << " " << Sign << " " << Splits.second << '\n';
+    if (Sign != '+' && Sign != '-')
+      return SetValid(false);
+    bool IsOn = Sign == '+';
+    for (; CurIndex < AllFeatures.size(); ++CurIndex) {
+      if (Feature == AllFeatures[CurIndex]) {
+        if (FeatureMap)
+          (*FeatureMap)[Feature] = IsOn;
+        break;
+      }
+    }
+    if (CurIndex == AllFeatures.size())
+      return SetValid(false);
+    Features = Splits.second;
+  }
+  return SetValid(true);
+};
+} // namespace clang
Index: clang/lib/Basic/CMakeLists.txt
===================================================================
--- clang/lib/Basic/CMakeLists.txt
+++ clang/lib/Basic/CMakeLists.txt
@@ -53,6 +53,7 @@
   LangStandards.cpp
   Module.cpp
   ObjCRuntime.cpp
+  OffloadArch.cpp
   OpenMPKinds.cpp
   OperatorPrecedence.cpp
   SanitizerBlacklist.cpp
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -572,7 +572,10 @@
 def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[DriverOption]>,
   HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
 def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[DriverOption]>,
-  HelpText<"CUDA/HIP offloading device architecture (e.g. sm_35, gfx906).  May be specified more than once.">;
+  HelpText<"CUDA offloading device architecture (e.g. sm_35), or HIP offloading target id in the form of a "
+           "device architecture followed by target id features delimited by a colon. Each target id feature "
+           "is a pre-defined string followed by a plus or minus sign (e.g. gfx908:xnack+:sramecc-).  May be "
+           "specified more than once.">;
 def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>,
   Alias<offload_arch_EQ>;
 def hip_link : Flag<["--"], "hip-link">,
Index: clang/include/clang/Basic/OffloadArch.h
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/OffloadArch.h
@@ -0,0 +1,41 @@
+//===--- OffloadArch.h - Utilities for offload arch -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_OFFLOAD_ARCH_H
+#define LLVM_CLANG_BASIC_OFFLOAD_ARCH_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+
+namespace clang {
+
+/// Get all feature strings that can be used in offload arch for \m Device.
+/// Offload arch is a device name with optional offload arch feature strings
+/// postfixed by a plus or minus sign delimited by colons, e.g.
+/// gfx908:xnack+:sramecc-. Each device have limited
+/// number of predefined offload arch features which have to follow predefined
+/// order when showing up in a offload arch.
+const llvm::SmallVector<llvm::StringRef, 4>
+getAllPossibleOffloadArchFeatures(llvm::StringRef Device);
+
+/// Parse an offload arch to get GPU arch and feature map.
+/// Returns GPU arch.
+/// Returns offload arch features in \p FeatureMap if it is not null pointer.
+/// This function assumes \p OffloadArch is a valid offload arch.
+/// If the offload arch contains feature+, map it to true.
+/// If the offload arch contains feature-, map it to false.
+/// If the offload arch does not contain a feature (default), do not map it.
+/// Returns whether the offload arch features are valid in \p IsValid if it
+/// is not a null pointer.
+llvm::StringRef parseOffloadArch(llvm::StringRef OffloadArch,
+                                 llvm::StringMap<bool> *FeatureMap = nullptr,
+                                 bool *IsValid = nullptr);
+
+} // namespace clang
+
+#endif
Index: clang/include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -73,6 +73,7 @@
   InGroup<CudaUnknownVersion>;
 def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
 def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
+def err_drv_bad_target_id : Error<"Invalid target id: %0">;
 def err_drv_invalid_thread_model_for_target : Error<
   "invalid thread model '%0' in '%1' for this target">;
 def err_drv_invalid_linker_name : Error<
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to