yaxunl updated this revision to Diff 266315.
yaxunl marked an inline comment as done.
yaxunl added a comment.
Herald added subscribers: kerbowa, nhaehnle, jvesely.
Fixed passing target id to clang -cc1. Added predefined macros for target id.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D60620/new/
https://reviews.llvm.org/D60620
Files:
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Basic/OffloadArch.h
clang/lib/Basic/CMakeLists.txt
clang/lib/Basic/OffloadArch.cpp
clang/lib/Basic/Targets/AMDGPU.cpp
clang/lib/Basic/Targets/AMDGPU.h
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/lib/Driver/ToolChains/AMDGPU.h
clang/lib/Driver/ToolChains/HIP.cpp
clang/test/Driver/amdgpu-macros.cl
clang/test/Driver/invalid-target-id.cl
clang/test/Driver/invalid-target-id.hip
clang/test/Driver/target-id-macros.cl
clang/test/Driver/target-id-macros.hip
clang/test/Driver/target-id.cl
clang/test/Driver/target-id.hip
Index: clang/test/Driver/target-id.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/target-id.hip
@@ -0,0 +1,54 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx908 \
+// RUN: --offload-arch=gfx908+xnack+sramecc \
+// RUN: --offload-arch=gfx908+xnack-sramecc \
+// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN: %s 2>&1 | FileCheck %s
+
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" "-fcuda-is-device"
+
+// CHECK: [[OPT:".*opt"]] {{".*-gfx908-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: "-mcpu=gfx908"
+// CHECK-SAME: "-o" [[OPT_906_BC:".*-gfx908-optimized.*bc"]]
+
+// CHECK: [[LLC: ".*llc"]] [[OPT_906_BC]]
+// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-mcpu=gfx908"
+// CHECK-SAME: "-o" {{".*-gfx908-.*o"}}
+
+// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-target-cpu" "gfx908"
+// CHECK-SAME: {{.*}} "-target-feature" "+xnack"
+// CHECK-SAME: {{.*}} "-target-feature" "+sram-ecc"
+
+// CHECK: [[OPT]] {{".*-gfx908\+xnack\+sramecc.*bc"}} "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: "-mcpu=gfx908"
+// CHECK-SAME: "-o" [[OPT_906XE_BC:".*-gfx908\+xnack\+sramecc.*bc"]]
+
+// CHECK: [[LLC]] [[OPT_906XE_BC]]
+// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-mcpu=gfx908"
+// CHECK-SAME: "-o" {{".*-gfx908\+xnack\+sramecc.*o"}}
+
+// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-target-cpu" "gfx908"
+// CHECK-SAME: {{.*}} "-target-feature" "+xnack"
+// CHECK-SAME: {{.*}} "-target-feature" "-sram-ecc"
+
+// CHECK: [[OPT]] {{".*-gfx908\+xnack-sramecc.*bc"}} "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: "-mcpu=gfx908"
+// CHECK-SAME: "-o" [[OPT_906XN_BC:".*-gfx908\+xnack-sramecc.*bc"]]
+
+// CHECK: [[LLC]] [[OPT_906XN_BC]]
+// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
+// CHECK-SAME: {{.*}} "-mcpu=gfx908"
+// CHECK-SAME: "-o" {{".*-gfx908\+xnack-sramecc.*o"}}
+
+
+// CHECK: {{".*clang-offload-bundler"}}
+// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx908,hip-amdgcn-amd-amdhsa-gfx908+xnack+sramecc,hip-amdgcn-amd-amdhsa-gfx908+xnack-sramecc"
Index: clang/test/Driver/target-id.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/target-id.cl
@@ -0,0 +1,21 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -mcpu=gfx908+xnack-sramecc \
+// RUN: -nostdlib %s 2>&1 | FileCheck %s
+
+
+// RUN: %clang -### -target amdgcn-amd-amdpal \
+// RUN: -mcpu=gfx908+xnack-sramecc \
+// RUN: -nostdlib %s 2>&1 | FileCheck %s
+
+
+// RUN: %clang -### -target amdgcn--mesa3d \
+// RUN: -mcpu=gfx908+xnack-sramecc \
+// RUN: -nostdlib %s 2>&1 | FileCheck %s
+
+// CHECK: "-target-cpu" "gfx908"
+// CHECK-SAME: "-target-feature" "+xnack"
+// CHECK-SAME: "-target-feature" "-sram-ecc"
Index: clang/test/Driver/target-id-macros.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/target-id-macros.hip
@@ -0,0 +1,12 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -E -dM -target x86_64-linux-gnu --cuda-device-only \
+// RUN: --offload-arch=gfx908+xnack-sramecc -nogpulib -o - %s 2>&1 \
+// RUN: | FileCheck %s
+
+// CHECK-DAG: #define __amdgcn_processor__ "gfx908"
+// CHECK-DAG: #define __amdgcn_xnack__ 1
+// CHECK-DAG: #define __amdgcn_sramecc__ 0
+// CHECK-DAG: #define __amdgcn_target_id__ "gfx908+xnack-sramecc"
Index: clang/test/Driver/target-id-macros.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/target-id-macros.cl
@@ -0,0 +1,20 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -E -dM -target amdgcn-amd-amdhsa \
+// RUN: -mcpu=gfx908+xnack-sramecc -nogpulib -o - %s 2>&1 \
+// RUN: | FileCheck %s
+
+// RUN: %clang -E -dM -target amdgcn-amd-amdpal \
+// RUN: -mcpu=gfx908+xnack-sramecc -nogpulib -o - %s 2>&1 \
+// RUN: | FileCheck %s
+
+// RUN: %clang -E -dM -target amdgcn--mesa3d \
+// RUN: -mcpu=gfx908+xnack-sramecc -nogpulib -o - %s 2>&1 \
+// RUN: | FileCheck %s
+
+// CHECK-DAG: #define __amdgcn_processor__ "gfx908"
+// CHECK-DAG: #define __amdgcn_xnack__ 1
+// CHECK-DAG: #define __amdgcn_sramecc__ 0
+// CHECK-DAG: #define __amdgcn_target_id__ "gfx908+xnack-sramecc"
Index: clang/test/Driver/invalid-target-id.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/invalid-target-id.hip
@@ -0,0 +1,46 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx908 \
+// RUN: --offload-arch=gfx908xnack \
+// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
+
+// NOPLUS: error: Unsupported CUDA gpu architecture
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx908 \
+// RUN: --offload-arch=gfx908+sramecc+xnack \
+// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s
+
+// ORDER: error: Invalid target id: gfx908+sramecc+xnack
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx908 \
+// RUN: --offload-arch=gfx908+unknown \
+// RUN: --offload-arch=gfx908+sramecc+unknown \
+// RUN: --offload-arch=gfx900+xnack \
+// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s
+
+// UNK: error: Invalid target id: gfx908+unknown
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx908 \
+// RUN: --offload-arch=gfx908+sramecc+unknown \
+// RUN: --offload-arch=gfx900+xnack \
+// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s
+
+// MIXED: error: Invalid target id: gfx908+sramecc+unknown
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: -x hip --offload-arch=gfx908 \
+// RUN: --offload-arch=gfx900+xnack \
+// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s
+
+// UNSUP: error: Invalid target id: gfx900+xnack
Index: clang/test/Driver/invalid-target-id.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/invalid-target-id.cl
@@ -0,0 +1,33 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: not %clang -target amdgcn-amd-amdhsa \
+// RUN: -mcpu=gfx908xnack -nostdlib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
+
+// NOPLUS: error: unknown target CPU 'gfx908xnack'
+
+// RUN: not %clang -target amdgcn-amd-amdpal \
+// RUN: -mcpu=gfx908+sramecc+xnack -nostdlib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s
+
+// ORDER: error: Invalid target id: gfx908+sramecc+xnack
+
+// RUN: not %clang -target amdgcn--mesa3d \
+// RUN: -mcpu=gfx908+unknown -nostdlib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s
+
+// UNK: error: Invalid target id: gfx908+unknown
+
+// RUN: not %clang -target amdgcn-amd-amdhsa \
+// RUN: -mcpu=gfx908+sramecc+unknown -nostdlib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s
+
+// MIXED: error: Invalid target id: gfx908+sramecc+unknown
+
+// RUN: not %clang -target amdgcn-amd-amdhsa \
+// RUN: -mcpu=gfx900+xnack -nostdlib \
+// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s
+
+// UNSUP: error: Invalid target id: gfx900+xnack
Index: clang/test/Driver/amdgpu-macros.cl
===================================================================
--- clang/test/Driver/amdgpu-macros.cl
+++ clang/test/Driver/amdgpu-macros.cl
@@ -310,3 +310,24 @@
// GFX1010-DAG: #define __gfx1010__ 1
// GFX1011-DAG: #define __gfx1011__ 1
// GFX1012-DAG: #define __gfx1012__ 1
+
+// GFX600-DAG: #define __amdgcn_processor__ "gfx600"
+// GFX601-DAG: #define __amdgcn_processor__ "gfx601"
+// GFX700-DAG: #define __amdgcn_processor__ "gfx700"
+// GFX701-DAG: #define __amdgcn_processor__ "gfx701"
+// GFX702-DAG: #define __amdgcn_processor__ "gfx702"
+// GFX703-DAG: #define __amdgcn_processor__ "gfx703"
+// GFX704-DAG: #define __amdgcn_processor__ "gfx704"
+// GFX801-DAG: #define __amdgcn_processor__ "gfx801"
+// GFX802-DAG: #define __amdgcn_processor__ "gfx802"
+// GFX803-DAG: #define __amdgcn_processor__ "gfx803"
+// GFX810-DAG: #define __amdgcn_processor__ "gfx810"
+// GFX900-DAG: #define __amdgcn_processor__ "gfx900"
+// GFX902-DAG: #define __amdgcn_processor__ "gfx902"
+// GFX904-DAG: #define __amdgcn_processor__ "gfx904"
+// GFX906-DAG: #define __amdgcn_processor__ "gfx906"
+// GFX908-DAG: #define __amdgcn_processor__ "gfx908"
+// GFX909-DAG: #define __amdgcn_processor__ "gfx909"
+// GFX1010-DAG: #define __amdgcn_processor__ "gfx1010"
+// GFX1011-DAG: #define __amdgcn_processor__ "gfx1011"
+// GFX1012-DAG: #define __amdgcn_processor__ "gfx1012"
Index: clang/lib/Driver/ToolChains/HIP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -11,6 +11,7 @@
#include "CommonArgs.h"
#include "InputInfo.h"
#include "clang/Basic/Cuda.h"
+#include "clang/Basic/OffloadArch.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
@@ -117,6 +118,9 @@
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
+ auto Pos = SubArchName.find_first_of("+-");
+ if (Pos != SubArchName.npos)
+ SubArchName = SubArchName.substr(0, Pos);
// Construct opt command.
ArgStringList OptArgs;
// The input to opt is the output from llvm-link.
@@ -145,6 +149,9 @@
const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
llvm::StringRef OutputFilePrefix, const char *InputFileName,
bool OutputIsAsm) const {
+ auto Pos = SubArchName.find_first_of("+-");
+ if (Pos != SubArchName.npos)
+ SubArchName = SubArchName.substr(0, Pos);
// Construct llc command.
ArgStringList LlcArgs;
// The input to llc is the output from opt.
@@ -281,7 +288,8 @@
Action::OffloadKind DeviceOffloadingKind) const {
HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
- StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
+ // Allow using target id in --offload-arch.
+ StringRef GpuArch = translateTargetId(DriverArgs, CC1Args);
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
(void) GpuArch;
assert(DeviceOffloadingKind == Action::OFK_HIP &&
@@ -289,8 +297,6 @@
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
- CC1Args.push_back("-target-cpu");
- CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
CC1Args.push_back("-fcuda-is-device");
if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
@@ -391,6 +397,7 @@
}
if (!BoundArch.empty()) {
+ DAL->eraseArg(options::OPT_mcpu_EQ);
DAL->eraseArg(options::OPT_march_EQ);
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
}
Index: clang/lib/Driver/ToolChains/AMDGPU.h
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.h
+++ clang/lib/Driver/ToolChains/AMDGPU.h
@@ -221,6 +221,14 @@
static bool isWave64(const llvm::opt::ArgList &DriverArgs,
llvm::AMDGPU::GPUKind Kind);
+
+protected:
+ /// Translate -mcpu option containing target id to cc1 options.
+ /// Returns the GPU name.
+ StringRef translateTargetId(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args) const;
+
+ StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const;
};
class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -9,6 +9,7 @@
#include "AMDGPU.h"
#include "CommonArgs.h"
#include "InputInfo.h"
+#include "clang/Basic/OffloadArch.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "llvm/Option/ArgList.h"
@@ -247,16 +248,22 @@
DerivedArgList *DAL =
Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
- // Do nothing if not OpenCL (-x cl)
- if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
- return DAL;
+ const OptTable &Opts = getDriver().getOpts();
if (!DAL)
DAL = new DerivedArgList(Args.getBaseArgs());
- for (auto *A : Args)
- DAL->append(A);
+ for (auto *A : Args) {
+ // Clang always pass -mcpu to clang -cc1 by -target-cpu. For translating
+ // target id, we have to pass it by -march.
+ if (A->getOption().matches(options::OPT_mcpu_EQ))
+ DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+ Args.getLastArgValue(options::OPT_mcpu_EQ));
+ else
+ DAL->append(A);
+ }
- const OptTable &Opts = getDriver().getOpts();
+ if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
+ return DAL;
// Phase 1 (.cl -> .bc)
if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
@@ -301,7 +308,8 @@
if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
- auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch());
+ auto Kind =
+ llvm::AMDGPU::parseArchAMDGCN(parseOffloadArch(JA.getOffloadingArch()));
if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
options::OPT_fno_cuda_flush_denormals_to_zero,
@@ -311,7 +319,7 @@
return llvm::DenormalMode::getIEEE();
}
- const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
+ const StringRef GpuArch = getGPUArch(DriverArgs);
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
// TODO: There are way too many flags that change this. Do we need to check
@@ -345,6 +353,8 @@
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
+ // Allow using target id in -mcpu.
+ translateTargetId(DriverArgs, CC1Args);
// Default to "hidden" visibility, as object level linking will not be
// supported for the foreseeable future.
if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
@@ -355,6 +365,56 @@
}
}
+StringRef
+AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
+ assert(!DriverArgs.hasArg(options::OPT_mcpu_EQ) &&
+ "-mcpu should have been translated to -march");
+ return parseOffloadArch(DriverArgs.getLastArgValue(options::OPT_march_EQ));
+}
+
+StringRef
+AMDGPUToolChain::translateTargetId(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args) const {
+ StringRef GpuArch;
+ llvm::StringMap<bool> FeatureMap;
+ assert(!DriverArgs.hasArg(options::OPT_mcpu_EQ) &&
+ "-mcpu should have been translated to -march");
+ StringRef TargetId = DriverArgs.getLastArgValue(options::OPT_march_EQ);
+ if (TargetId.empty())
+ return GpuArch;
+
+ bool IsValid;
+ GpuArch = parseOffloadArch(TargetId, &FeatureMap, &IsValid);
+ if (!IsValid) {
+ getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetId;
+ return GpuArch;
+ }
+
+ if (GpuArch.empty())
+ return GpuArch;
+
+ CC1Args.push_back("-target-cpu");
+ CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
+
+ // Iterate through all possible target id features for the given GPU.
+ // If it is mapped to true, pass -mfeature to clang -cc1.
+ // If it is mapped to false, pass -mno-feature to clang -cc1.
+ // If it is not in the map (default), do not pass it to clang -cc1.
+ for (auto Feature : getAllPossibleOffloadArchFeatures(GpuArch)) {
+ auto Pos = FeatureMap.find(Feature);
+ if (Pos == FeatureMap.end())
+ continue;
+ CC1Args.push_back("-target-feature");
+ auto FeatureName = Feature;
+ if (Feature == "sramecc")
+ FeatureName = "sram-ecc";
+ std::string Opt = (Twine(Pos->second ? "+" : "-") + FeatureName).str();
+ CC1Args.push_back(DriverArgs.MakeArgStringRef(Opt));
+ }
+
+ return GpuArch;
+}
+
void ROCMToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
@@ -376,7 +436,7 @@
}
// Get the device name and canonicalize it
- const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
+ const StringRef GpuArch = getGPUArch(DriverArgs);
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -45,6 +45,7 @@
#include "ToolChains/TCE.h"
#include "ToolChains/WebAssembly.h"
#include "ToolChains/XCore.h"
+#include "clang/Basic/OffloadArch.h"
#include "clang/Basic/Version.h"
#include "clang/Config/config.h"
#include "clang/Driver/Action.h"
@@ -2355,8 +2356,20 @@
bool EmitLLVM = false;
bool EmitAsm = false;
+ /// Id to identify each device compilation. For CUDA it is simply the
+ /// GPU arch string. For HIP it is either the GPU arch string or GPU
+ /// arch string plus feature strings delimited by a plus sign, e.g.
+ /// gfx906+xnack.
+ struct TargetId {
+ /// Target id string which is persistent throughout the compilation.
+ const char *Id;
+ TargetId(CudaArch Arch) { Id = CudaArchToString(Arch); }
+ TargetId(const char *Id) : Id(Id) {}
+ operator const char *() { return Id; }
+ operator StringRef() { return StringRef(Id); }
+ };
/// List of GPU architectures to use in this compilation.
- SmallVector<CudaArch, 4> GpuArchList;
+ SmallVector<TargetId, 4> GpuArchList;
/// The CUDA actions for the current input.
ActionList CudaDeviceActions;
@@ -2439,7 +2452,7 @@
for (auto Arch : GpuArchList) {
CudaDeviceActions.push_back(UA);
- UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch),
+ UA->registerDependentActionInfo(ToolChains[0], Arch,
AssociatedOffloadKind);
}
return ABRT_Success;
@@ -2450,10 +2463,9 @@
void appendTopLevelActions(ActionList &AL) override {
// Utility to append actions to the top level list.
- auto AddTopLevel = [&](Action *A, CudaArch BoundArch) {
+ auto AddTopLevel = [&](Action *A, TargetId TargetId) {
OffloadAction::DeviceDependences Dep;
- Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch),
- AssociatedOffloadKind);
+ Dep.add(*A, *ToolChains.front(), TargetId, AssociatedOffloadKind);
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
};
@@ -2481,6 +2493,8 @@
CudaDeviceActions.clear();
}
+ virtual bool IsValidOffloadArch(StringRef Arch) const = 0;
+
bool initialize() override {
assert(AssociatedOffloadKind == Action::OFK_Cuda ||
AssociatedOffloadKind == Action::OFK_HIP);
@@ -2528,7 +2542,7 @@
EmitAsm = Args.getLastArg(options::OPT_S);
// Collect all cuda_gpu_arch parameters, removing duplicates.
- std::set<CudaArch> GpuArchs;
+ std::set<StringRef> GpuArchs;
bool Error = false;
for (Arg *A : Args) {
if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
@@ -2542,21 +2556,19 @@
GpuArchs.clear();
continue;
}
- CudaArch Arch = StringToCudaArch(ArchStr);
- if (Arch == CudaArch::UNKNOWN) {
- C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+ if (!IsValidOffloadArch(ArchStr)) {
Error = true;
} else if (A->getOption().matches(options::OPT_offload_arch_EQ))
- GpuArchs.insert(Arch);
+ GpuArchs.insert(ArchStr);
else if (A->getOption().matches(options::OPT_no_offload_arch_EQ))
- GpuArchs.erase(Arch);
+ GpuArchs.erase(ArchStr);
else
llvm_unreachable("Unexpected option.");
}
// Collect list of GPUs remaining in the set.
- for (CudaArch Arch : GpuArchs)
- GpuArchList.push_back(Arch);
+ for (auto Arch : GpuArchs)
+ GpuArchList.push_back(Arch.data());
// Default to sm_20 which is the lowest common denominator for
// supported GPUs. sm_20 code should work correctly, if
@@ -2578,6 +2590,15 @@
DefaultCudaArch = CudaArch::SM_20;
}
+ bool IsValidOffloadArch(StringRef ArchStr) const override {
+ CudaArch Arch = StringToCudaArch(ArchStr);
+ if (Arch == CudaArch::UNKNOWN) {
+ C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+ return false;
+ }
+ return true;
+ }
+
ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
@@ -2637,8 +2658,7 @@
for (auto &A : {AssembleAction, BackendAction}) {
OffloadAction::DeviceDependences DDep;
- DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
- Action::OFK_Cuda);
+ DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda);
DeviceActions.push_back(
C.MakeAction<OffloadAction>(DDep, A->getType()));
}
@@ -2697,6 +2717,22 @@
bool canUseBundlerUnbundler() const override { return true; }
+ bool IsValidOffloadArch(StringRef IdStr) const override {
+ bool IsValid;
+ const StringRef ArchStr =
+ parseOffloadArch(IdStr, /*FeatureMap=*/nullptr, &IsValid);
+ CudaArch Arch = StringToCudaArch(ArchStr);
+ if (Arch == CudaArch::UNKNOWN) {
+ C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+ return false;
+ }
+ if (!IsValid) {
+ C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
+ return false;
+ }
+ return true;
+ };
+
ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
@@ -2738,8 +2774,8 @@
// device arch of the next action being propagated to the above link
// action.
OffloadAction::DeviceDependences DDep;
- DDep.add(*CudaDeviceActions[I], *ToolChains.front(),
- CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+ DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I],
+ AssociatedOffloadKind);
CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
DDep, CudaDeviceActions[I]->getType());
}
@@ -2795,8 +2831,8 @@
for (auto &LI : DeviceLinkerInputs) {
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
- DA.add(*DeviceLinkAction, *ToolChains[0],
- CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+ DA.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I],
+ AssociatedOffloadKind);
++I;
}
}
Index: clang/lib/Basic/Targets/AMDGPU.h
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.h
+++ clang/lib/Basic/Targets/AMDGPU.h
@@ -13,6 +13,7 @@
#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
#define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
+#include "clang/Basic/OffloadArch.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "llvm/ADT/StringSet.h"
@@ -41,6 +42,14 @@
llvm::AMDGPU::GPUKind GPUKind;
unsigned GPUFeatures;
+ /// Features set by extended --offload-arch, -mcpu, or -target-cpu options
+ /// by the format of GPU name followed by optional feature name delimitted
+ /// by plus or minus sign, e.g. gfx908+xnack-sramecc.
+ /// If the offload arch contains +feature, map it to true.
+ /// If the offload arch contains -feature, map it to false.
+ /// If the offload arch does not contain a feature (default), do not map it.
+ llvm::StringMap<bool> OffloadArchFeatures;
+
bool hasFP64() const {
return getTriple().getArch() == llvm::Triple::amdgcn ||
!!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64);
@@ -356,6 +365,25 @@
void setAuxTarget(const TargetInfo *Aux) override;
bool hasExtIntType() const override { return true; }
+
+ // Record offload arch features since they are needed for defining the
+ // pre-defined macros.
+ bool handleTargetFeatures(std::vector<std::string> &Features,
+ DiagnosticsEngine &Diags) override {
+ for (auto &F : Features) {
+ llvm::errs() << "Feature: " << F << '\n';
+ assert(F.front() == '+' || F.front() == '-');
+ bool IsOn = F.front() == '+';
+ StringRef Name = StringRef(F).drop_front();
+ if (Name == "sram-ecc")
+ Name = "sramecc";
+ if (Name != "xnack" && Name != "sramecc")
+ continue;
+ assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end());
+ OffloadArchFeatures[Name] = IsOn;
+ }
+ return true;
+ }
};
} // namespace targets
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -334,6 +334,22 @@
StringRef CanonName = isAMDGCN(getTriple()) ?
getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
+ if (isAMDGCN(getTriple())) {
+ Builder.defineMacro("__amdgcn_processor__",
+ Twine("\"") + Twine(CanonName) + Twine("\""));
+
+ std::string TargetId = CanonName.str();
+ for (auto F : getAllPossibleOffloadArchFeatures(CanonName)) {
+ auto Loc = OffloadArchFeatures.find(F);
+ if (Loc != OffloadArchFeatures.end()) {
+ Builder.defineMacro(Twine("__amdgcn_") + Twine(F) + Twine("__"),
+ Loc->second ? "1" : "0");
+ TargetId = TargetId + (Loc->second ? "+" : "-") + F.str();
+ }
+ }
+ Builder.defineMacro("__amdgcn_target_id__",
+ Twine("\"") + Twine(TargetId) + Twine("\""));
+ }
}
// TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
Index: clang/lib/Basic/OffloadArch.cpp
===================================================================
--- /dev/null
+++ clang/lib/Basic/OffloadArch.cpp
@@ -0,0 +1,78 @@
+//===--- OffloadArch.cpp - Utilities for parsing offload arch -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/OffloadArch.h"
+
+namespace clang {
+
+const llvm::SmallVector<llvm::StringRef, 4>
+getAllPossibleOffloadArchFeatures(llvm::StringRef Device) {
+ llvm::SmallVector<llvm::StringRef, 4> Ret;
+ if (Device == "gfx902" || Device == "gfx908" || Device == "gfx909" ||
+ Device.startswith("gfx10"))
+ Ret.push_back("xnack");
+ if (Device == "gfx906" || Device == "gfx908" || Device == "gfx909")
+ Ret.push_back("sramecc");
+ return Ret;
+}
+
+llvm::StringRef parseOffloadArch(llvm::StringRef OffloadArch,
+ llvm::StringMap<bool> *FeatureMap,
+ bool *IsValid) {
+ // Return a pair of StringRef separated by the first occurrence of '+'
+ // or '-'. The delimiter is returned in Delim.
+ auto ParseTargetId = [](llvm::StringRef S, char &Delim) {
+ std::pair<llvm::StringRef, llvm::StringRef> Ret;
+ auto Pos = S.find_first_of("+-");
+ if (Pos == S.npos) {
+ Ret.first = S;
+ return Ret;
+ }
+ Delim = S[Pos];
+ Ret.first = S.substr(0, Pos);
+ Ret.second = S.drop_front(Pos + 1);
+ return Ret;
+ };
+ char Delim;
+ auto Split = ParseTargetId(OffloadArch, Delim);
+ llvm::StringRef ArchStr = Split.first;
+ if (!FeatureMap && !IsValid)
+ return ArchStr;
+
+ auto Features = Split.second;
+ if (Features.empty()) {
+ if (IsValid)
+ *IsValid = true;
+ return ArchStr;
+ }
+
+ auto AllFeatures = getAllPossibleOffloadArchFeatures(ArchStr);
+ unsigned CurIndex = 0;
+ while (!Features.empty()) {
+ assert(Delim == '+' || Delim == '-');
+ bool IsOn = Delim == '+';
+ auto Splits = ParseTargetId(Features, Delim);
+ for (; CurIndex < AllFeatures.size(); ++CurIndex) {
+ if (Splits.first == AllFeatures[CurIndex]) {
+ if (FeatureMap)
+ (*FeatureMap)[Splits.first] = IsOn;
+ break;
+ }
+ }
+ if (CurIndex == AllFeatures.size()) {
+ if (IsValid)
+ *IsValid = false;
+ return ArchStr;
+ }
+ Features = Splits.second;
+ }
+ if (IsValid)
+ *IsValid = true;
+ return ArchStr;
+};
+} // namespace clang
Index: clang/lib/Basic/CMakeLists.txt
===================================================================
--- clang/lib/Basic/CMakeLists.txt
+++ clang/lib/Basic/CMakeLists.txt
@@ -53,6 +53,7 @@
LangStandards.cpp
Module.cpp
ObjCRuntime.cpp
+ OffloadArch.cpp
OpenMPKinds.cpp
OperatorPrecedence.cpp
SanitizerBlacklist.cpp
Index: clang/include/clang/Basic/OffloadArch.h
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/OffloadArch.h
@@ -0,0 +1,41 @@
+//===--- OffloadArch.h - Utilities for offload arch -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_OFFLOAD_ARCH_H
+#define LLVM_CLANG_BASIC_OFFLOAD_ARCH_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+
+namespace clang {
+
+/// Get all feature strings that can be used in offload arch for \m Device.
+/// Offload arch is a device name with optional offload arch feature strings
+/// delimite by a plus or minus sign, e.g. gfx906+xnack-sramecc. Offload arch
+/// feature itsel cannot contain plus or minus sign. Each device have limited
+/// number of predefined offload arch features which have to follow predefined
+/// order when showing up in a offload arch.
+const llvm::SmallVector<llvm::StringRef, 4>
+getAllPossibleOffloadArchFeatures(llvm::StringRef Device);
+
+/// Parse an offload arch to get GPU arch and feature map.
+/// Returns GPU arch.
+/// Returns offload arch features in \p FeatureMap if it is not null pointer.
+/// This function assumes \p OffloadArch is a valid offload arch.
+/// If the offload arch contains +feature, map it to true.
+/// If the offload arch contains -feature, map it to false.
+/// If the offload arch does not contain a feature (default), do not map it.
+/// Returns whether the offload arch features are valid in \p IsValid if it
+/// is not a null pointer.
+llvm::StringRef parseOffloadArch(llvm::StringRef OffloadArch,
+ llvm::StringMap<bool> *FeatureMap = nullptr,
+ bool *IsValid = nullptr);
+
+} // namespace clang
+
+#endif
Index: clang/include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -73,6 +73,7 @@
InGroup<CudaUnknownVersion>;
def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
+def err_drv_bad_target_id : Error<"Invalid target id: %0">;
def err_drv_invalid_thread_model_for_target : Error<
"invalid thread model '%0' in '%1' for this target">;
def err_drv_invalid_linker_name : Error<
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits