jhuber6 updated this revision to Diff 483576.
jhuber6 added a comment.
Accidentally deleted the old `getInputFilename` routine which we need. The
symlink worked fine but would break on Windows, so I ended up writing a hack
that would only use `.cubin` if we have the `nvlink` linker active and the input
comes from the assembler. It's extremely ugly so let me know what you think.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D140158/new/
https://reviews.llvm.org/D140158
Files:
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/Cuda.cpp
clang/lib/Driver/ToolChains/Cuda.h
clang/test/Driver/cuda-cross-compiling.c
Index: clang/test/Driver/cuda-cross-compiling.c
===================================================================
--- /dev/null
+++ clang/test/Driver/cuda-cross-compiling.c
@@ -0,0 +1,68 @@
+// Tests the driver when targeting the NVPTX architecture directly without a
+// host toolchain to perform CUDA mappings.
+
+// REQUIRES: nvptx-registered-target
+
+//
+// Test the generated phases when targeting NVPTX.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \
+// RUN: | FileCheck -check-prefix=PHASES %s
+
+// PHASES: 0: input, "[[INPUT:.+]]", c
+// PHASES-NEXT: 1: preprocessor, {0}, cpp-output
+// PHASES-NEXT: 2: compiler, {1}, ir
+// PHASES-NEXT: 3: backend, {2}, assembler
+// PHASES-NEXT: 4: assembler, {3}, object
+// PHASES-NEXT: 5: linker, {4}, image
+
+//
+// Test the generated bindings when targeting NVPTX.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -ccc-print-bindings %s 2>&1 \
+// RUN: | FileCheck -check-prefix=BINDINGS %s
+
+// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX:.+]].s"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX]].s"], output: "[[CUBIN:.+]].o"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN]].o"], output: "a.out"
+
+//
+// Test the generated arguments to the CUDA binary utils when targeting NVPTX.
+// Ensure that the '.o' files are converted to '.cubin' if produced internally.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_61 -### %s 2>&1 \
+// RUN: | FileCheck -check-prefix=ARGS %s
+
+// ARGS: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.*}} "-target-cpu" "sm_61" "-target-feature" "+ptx{{[0-9]+}}" {{.*}} "-o" "[[PTX:.+]].s"
+// ARGS-NEXT: ptxas" "-m64" "-O0" "--gpu-name" "sm_61" "--output-file" "[[CUBIN:.+]].cubin" "[[PTX]].s" "-c"
+// ARGS-NEXT: nvlink" "-o" "a.out" "-arch" "sm_61" {{.*}} "[[CUBIN]].cubin"
+
+//
+// Test the generated arguments to the CUDA binary utils when targeting NVPTX.
+// Ensure that we emit '.o' files if compiled with '-c'
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_61 -c -### %s 2>&1 \
+// RUN: | FileCheck -check-prefix=OBJECT %s
+
+// OBJECT: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.*}} "-target-cpu" "sm_61" "-target-feature" "+ptx{{[0-9]+}}" {{.*}} "-o" "[[PTX:.+]].s"
+// OBJECT-NEXT: ptxas" "-m64" "-O0" "--gpu-name" "sm_61" "--output-file" "[[OBJ:.+]].o" "[[PTX]].s" "-c"
+
+//
+// Test the generated arguments to the CUDA binary utils when targeting NVPTX.
+// Ensure that we copy input '.o' files to '.cubin' files when linking.
+//
+// RUN: touch %t.o
+// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_61 -### %t.o 2>&1 \
+// RUN: | FileCheck -check-prefix=LINK %s
+
+// LINK: nvlink" "-o" "a.out" "-arch" "sm_61" {{.*}} "{{.*}}.cubin"
+
+//
+// Test the generated arguments default to a value with no architecture.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -### %s 2>&1 \
+// RUN: | FileCheck -check-prefix=DEFAULT %s
+
+// DEFAULT: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.*}} "-target-cpu" "sm_35" "-target-feature" "+ptx{{[0-9]+}}" {{.*}} "-o" "[[PTX:.+]].s"
+// DEFAULT-NEXT: ptxas" "-m64" "-O0" "--gpu-name" "sm_35" "--output-file" "[[CUBIN:.+]].cubin" "[[PTX]].s" "-c"
+// DEFAULT-NEXT: nvlink" "-o" "a.out" "-arch" "sm_35" {{.*}} "[[CUBIN]].cubin"
Index: clang/lib/Driver/ToolChains/Cuda.h
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.h
+++ clang/lib/Driver/ToolChains/Cuda.h
@@ -98,9 +98,22 @@
// Runs fatbinary, which combines GPU object files ("cubin" files) and/or PTX
// assembly into a single output file.
+class LLVM_LIBRARY_VISIBILITY FatBinary : public Tool {
+ public:
+ FatBinary(const ToolChain &TC) : Tool("NVPTX::Linker", "fatbinary", TC) {}
+
+ bool hasIntegratedCPP() const override { return false; }
+
+ void ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &TCArgs,
+ const char *LinkingOutput) const override;
+};
+
+// Runs nvlink, which links GPU object files ("cubin" files) into a single file.
class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
public:
- Linker(const ToolChain &TC) : Tool("NVPTX::Linker", "fatbinary", TC) {}
+ Linker(const ToolChain &TC) : Tool("NVPTX::Linker", "nvlink", TC) {}
bool hasIntegratedCPP() const override { return false; }
@@ -119,73 +132,102 @@
namespace toolchains {
-class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain {
-public:
- CudaToolChain(const Driver &D, const llvm::Triple &Triple,
- const ToolChain &HostTC, const llvm::opt::ArgList &Args);
+class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
+ public:
+ NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
+ const llvm::Triple &HostTriple,
+ const llvm::opt::ArgList &Args);
- const llvm::Triple *getAuxTriple() const override {
- return &HostTC.getTriple();
- }
+ NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
+ const llvm::opt::ArgList &Args);
+
+ std::string getInputFilename(const InputInfo &Input) const override;
+
+ llvm::opt::DerivedArgList *
+ TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const override;
- std::string getInputFilename(const InputInfo &Input) const override;
-
- llvm::opt::DerivedArgList *
- TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
- Action::OffloadKind DeviceOffloadKind) const override;
- void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args,
- Action::OffloadKind DeviceOffloadKind) const override;
-
- llvm::DenormalMode getDefaultDenormalModeForType(
- const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
- const llvm::fltSemantics *FPType = nullptr) const override;
-
- // Never try to use the integrated assembler with CUDA; always fork out to
- // ptxas.
- bool useIntegratedAs() const override { return false; }
- bool isCrossCompiling() const override { return true; }
- bool isPICDefault() const override { return false; }
- bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
+ // Never try to use the integrated assembler with NVPTX; always fork out to
+ // ptxas.
+ bool useIntegratedAs() const override { return false; }
+ bool isCrossCompiling() const override { return true; }
+ bool isPICDefault() const override { return false; }
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
return false;
- }
- bool isPICDefaultForced() const override { return false; }
- bool SupportsProfiling() const override { return false; }
- bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override;
- void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind,
- const llvm::opt::ArgList &Args) const override;
- bool IsMathErrnoDefault() const override { return false; }
+ }
+ bool isPICDefaultForced() const override { return false; }
+ bool SupportsProfiling() const override { return false; }
- void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args) const override;
+ bool IsMathErrnoDefault() const override { return false; }
- void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override;
- CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
- void
- AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args) const override;
- void AddClangCXXStdlibIncludeArgs(
- const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CC1Args) const override;
- void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+ bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override;
+ void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind,
+ const llvm::opt::ArgList &Args) const override;
+
+ // NVPTX supports only DWARF2.
+ unsigned GetDefaultDwarfVersion() const override { return 2; }
+ unsigned getMaxDwarfVersion() const override { return 2; }
+
+ CudaInstallationDetector CudaInstallation;
+
+ protected:
+ Tool *buildAssembler() const override; // ptxas.
+ Tool *buildLinker() const override; // nvlink.
+ private:
+ // Global state to let us know if we need to emit a 'cubin' or a '.o' file.
+ static inline bool NeedsNvlink;
+};
+
+class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {
+ public:
+ CudaToolChain(const Driver &D, const llvm::Triple &Triple,
+ const ToolChain &HostTC, const llvm::opt::ArgList &Args);
+
+ const llvm::Triple *getAuxTriple() const override {
+ return &HostTC.getTriple();
+ }
+
+ std::string getInputFilename(const InputInfo &Input) const override;
+
+ llvm::opt::DerivedArgList *
+ TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const override;
+ void
+ addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ Action::OffloadKind DeviceOffloadKind) const override;
+
+ llvm::DenormalMode getDefaultDenormalModeForType(
+ const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
+ const llvm::fltSemantics *FPType = nullptr) const override;
+
+ void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
- SanitizerMask getSupportedSanitizers() const override;
+ void
+ addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override;
+ CXXStdlibType
+ GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
+ void
+ AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args) const override;
+ void AddClangCXXStdlibIncludeArgs(
+ const llvm::opt::ArgList &Args,
+ llvm::opt::ArgStringList &CC1Args) const override;
+ void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args) const override;
- VersionTuple
- computeMSVCVersion(const Driver *D,
- const llvm::opt::ArgList &Args) const override;
+ SanitizerMask getSupportedSanitizers() const override;
- unsigned GetDefaultDwarfVersion() const override { return 2; }
- // NVPTX supports only DWARF2.
- unsigned getMaxDwarfVersion() const override { return 2; }
+ VersionTuple
+ computeMSVCVersion(const Driver *D,
+ const llvm::opt::ArgList &Args) const override;
- const ToolChain &HostTC;
- CudaInstallationDetector CudaInstallation;
+ const ToolChain &HostTC;
-protected:
- Tool *buildAssembler() const override; // ptxas
- Tool *buildLinker() const override; // fatbinary (ok, not really a linker)
+ protected:
+ Tool *buildAssembler() const override; // ptxas
+ Tool *buildLinker() const override; // fatbinary (ok, not really a linker)
};
} // end namespace toolchains
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -380,18 +380,20 @@
const ArgList &Args,
const char *LinkingOutput) const {
const auto &TC =
- static_cast<const toolchains::CudaToolChain &>(getToolChain());
+ static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
assert(TC.getTriple().isNVPTX() && "Wrong platform");
StringRef GPUArchName;
- // If this is an OpenMP action we need to extract the device architecture
- // from the -march=arch option. This option may come from -Xopenmp-target
- // flag or the default value.
- if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
+ // If this is a CUDA action we need to extract the device architecture
+ // from the Job's associated architecture, otherwise use the -march=arch
+ // option. This option may come from -Xopenmp-target flag or the default
+ // value.
+ if (JA.isDeviceOffloading(Action::OFK_Cuda)) {
+ GPUArchName = JA.getOffloadingArch();
+ } else {
GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
assert(!GPUArchName.empty() && "Must have an architecture passed in.");
- } else
- GPUArchName = JA.getOffloadingArch();
+ }
// Obtain architecture from the action.
CudaArch gpu_arch = StringToCudaArch(GPUArchName);
@@ -462,13 +464,14 @@
for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
CmdArgs.push_back(Args.MakeArgString(A));
- bool Relocatable = false;
+ bool Relocatable = true;
if (JA.isOffloading(Action::OFK_OpenMP))
// In OpenMP we need to generate relocatable code.
Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
options::OPT_fnoopenmp_relocatable_target,
/*Default=*/true);
else if (JA.isOffloading(Action::OFK_Cuda))
+ // In CUDA we generate non-relocatable code by default.
Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
options::OPT_fno_gpu_rdc, /*Default=*/false);
@@ -506,11 +509,11 @@
// All inputs to this linker must be from CudaDeviceActions, as we need to look
// at the Inputs' Actions in order to figure out which GPU architecture they
// correspond to.
-void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
- const InputInfo &Output,
- const InputInfoList &Inputs,
- const ArgList &Args,
- const char *LinkingOutput) const {
+void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const ArgList &Args,
+ const char *LinkingOutput) const {
const auto &TC =
static_cast<const toolchains::CudaToolChain &>(getToolChain());
assert(TC.getTriple().isNVPTX() && "Wrong platform");
@@ -557,6 +560,83 @@
Exec, CmdArgs, Inputs, Output));
}
+void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const ArgList &Args,
+ const char *LinkingOutput) const {
+ const auto &TC =
+ static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
+ assert(TC.getTriple().isNVPTX() && "Wrong platform");
+
+ ArgStringList CmdArgs;
+ if (Output.isFilename()) {
+ CmdArgs.push_back("-o");
+ CmdArgs.push_back(Output.getFilename());
+ } else {
+ assert(Output.isNothing() && "Invalid output.");
+ }
+
+ if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
+ CmdArgs.push_back("-g");
+
+ if (Args.hasArg(options::OPT_v))
+ CmdArgs.push_back("-v");
+
+ StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
+ assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink.");
+
+ CmdArgs.push_back("-arch");
+ CmdArgs.push_back(Args.MakeArgString(GPUArch));
+
+ // Add paths specified in LIBRARY_PATH environment variable as -L options.
+ addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
+
+ // Add paths for the default clang library path.
+ SmallString<256> DefaultLibPath =
+ llvm::sys::path::parent_path(TC.getDriver().Dir);
+ llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
+ CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
+
+ for (const auto &II : Inputs) {
+ if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
+ II.getType() == types::TY_LTO_BC || II.getType() == types::TY_LLVM_BC) {
+ C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
+ << getToolChain().getTripleString();
+ continue;
+ }
+
+ // Currently, we only pass the input files to the linker, we do not pass
+ // any libraries that may be valid only for the host.
+ if (!II.isFilename())
+ continue;
+
+ // The 'nvlink' application performs RDC-mode linking when given a '.o'
+ // file and device linking when given a '.cubin' file. We always want to
+ // perform device linking, so just rename any '.o' files.
+ auto InputFile = getToolChain().getInputFilename(II);
+ if (llvm::sys::path::extension(InputFile) != ".cubin") {
+ const char *CubinF =
+ Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
+ llvm::sys::path::stem(InputFile), "cubin"));
+ if (std::error_code EC =
+ llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
+ continue;
+
+ CmdArgs.push_back(CubinF);
+ } else {
+ CmdArgs.push_back(Args.MakeArgString(InputFile));
+ }
+ }
+
+ C.addCommand(std::make_unique<Command>(
+ JA, *this,
+ ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
+ "--options-file"},
+ Args.MakeArgString(getToolChain().GetProgramPath("nvlink")), CmdArgs,
+ Inputs, Output));
+}
+
void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args,
std::vector<StringRef> &Features) {
@@ -599,14 +679,13 @@
Features.push_back(PtxFeature);
}
-/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
-/// which isn't properly a linker but nonetheless performs the step of stitching
-/// together object files from the assembler into a single blob.
-
-CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
- const ToolChain &HostTC, const ArgList &Args)
- : ToolChain(D, Triple, Args), HostTC(HostTC),
- CudaInstallation(D, HostTC.getTriple(), Args) {
+/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
+/// operates as a stand-alone version of the NVPTX tools without the host
+/// toolchain.
+NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
+ const llvm::Triple &HostTriple,
+ const ArgList &Args)
+ : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
if (CudaInstallation.isValid()) {
CudaInstallation.WarnIfUnsupportedVersion();
getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
@@ -614,12 +693,22 @@
// Lookup binaries into the driver directory, this is used to
// discover the clang-offload-bundler executable.
getProgramPaths().push_back(getDriver().Dir);
+ NeedsNvlink = false;
}
-std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
- // Only object files are changed, for example assembly files keep their .s
- // extensions. If the user requested device-only compilation don't change it.
- if (Input.getType() != types::TY_Object || getDriver().offloadDeviceOnly())
+/// We only need the host triple to locate the CUDA binary utilities, use the
+/// system's default triple if not provided.
+NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
+ const ArgList &Args)
+ : NVPTXToolChain(D, Triple,
+ llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args) {}
+
+std::string NVPTXToolChain::getInputFilename(const InputInfo &Input) const {
+ // We only need to replace the filename if it is not an input an object file.
+ // Otherwise we can simply copy the input file to a cubin.
+ const Action *A = Input.getAction();
+ if (Input.getType() != types::TY_Object || !A ||
+ A->getKind() != Action::AssembleJobClass || !NeedsNvlink)
return ToolChain::getInputFilename(Input);
// Replace extension for object files with cubin because nvlink relies on
@@ -629,9 +718,65 @@
return std::string(Filename.str());
}
+llvm::opt::DerivedArgList *
+NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
+ StringRef BoundArch,
+ Action::OffloadKind DeviceOffloadKind) const {
+ DerivedArgList *DAL =
+ ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
+ if (!DAL)
+ DAL = new DerivedArgList(Args.getBaseArgs());
+
+ const OptTable &Opts = getDriver().getOpts();
+
+ for (Arg *A : Args)
+ if (!llvm::is_contained(*DAL, A))
+ DAL->append(A);
+
+ if (!DAL->hasArg(options::OPT_march_EQ))
+ DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+ CudaArchToString(CudaArch::CudaDefault));
+
+ return DAL;
+}
+
+bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
+ const Option &O = A->getOption();
+ return (O.matches(options::OPT_gN_Group) &&
+ !O.matches(options::OPT_gmodules)) ||
+ O.matches(options::OPT_g_Flag) ||
+ O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
+ O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
+ O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
+ O.matches(options::OPT_gdwarf_5) ||
+ O.matches(options::OPT_gcolumn_info);
+}
+
+void NVPTXToolChain::adjustDebugInfoKind(
+ codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
+ switch (mustEmitDebugInfo(Args)) {
+ case DisableDebugInfo:
+ DebugInfoKind = codegenoptions::NoDebugInfo;
+ break;
+ case DebugDirectivesOnly:
+ DebugInfoKind = codegenoptions::DebugDirectivesOnly;
+ break;
+ case EmitSameDebugInfoAsHost:
+ // Use same debug info level as the host.
+ break;
+ }
+}
+
+/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
+/// which isn't properly a linker but nonetheless performs the step of stitching
+/// together object files from the assembler into a single blob.
+
+CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
+ const ToolChain &HostTC, const ArgList &Args)
+ : NVPTXToolChain(D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {}
+
void CudaToolChain::addClangTargetOptions(
- const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args,
+ const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
@@ -708,33 +853,6 @@
return llvm::DenormalMode::getIEEE();
}
-bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
- const Option &O = A->getOption();
- return (O.matches(options::OPT_gN_Group) &&
- !O.matches(options::OPT_gmodules)) ||
- O.matches(options::OPT_g_Flag) ||
- O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
- O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
- O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
- O.matches(options::OPT_gdwarf_5) ||
- O.matches(options::OPT_gcolumn_info);
-}
-
-void CudaToolChain::adjustDebugInfoKind(
- codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
- switch (mustEmitDebugInfo(Args)) {
- case DisableDebugInfo:
- DebugInfoKind = codegenoptions::NoDebugInfo;
- break;
- case DebugDirectivesOnly:
- DebugInfoKind = codegenoptions::DebugDirectivesOnly;
- break;
- case EmitSameDebugInfoAsHost:
- // Use same debug info level as the host.
- break;
- }
-}
-
void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
// Check our CUDA version if we're going to include the CUDA headers.
@@ -747,6 +865,19 @@
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
+std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
+ // Only object files are changed, for example assembly files keep their .s
+ // extensions. If the user requested device-only compilation don't change it.
+ if (Input.getType() != types::TY_Object || getDriver().offloadDeviceOnly())
+ return ToolChain::getInputFilename(Input);
+
+ // Replace extension for object files with cubin because nvlink relies on
+ // these particular file names.
+ SmallString<256> Filename(ToolChain::getInputFilename(Input));
+ llvm::sys::path::replace_extension(Filename, "cubin");
+ return std::string(Filename.str());
+}
+
llvm::opt::DerivedArgList *
CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
StringRef BoundArch,
@@ -785,12 +916,21 @@
return DAL;
}
+Tool *NVPTXToolChain::buildAssembler() const {
+ return new tools::NVPTX::Assembler(*this);
+}
+
+Tool *NVPTXToolChain::buildLinker() const {
+ NeedsNvlink = true;
+ return new tools::NVPTX::Linker(*this);
+}
+
Tool *CudaToolChain::buildAssembler() const {
return new tools::NVPTX::Assembler(*this);
}
Tool *CudaToolChain::buildLinker() const {
- return new tools::NVPTX::Linker(*this);
+ return new tools::NVPTX::FatBinary(*this);
}
void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -5937,6 +5937,9 @@
case llvm::Triple::Solaris:
TC = std::make_unique<toolchains::Solaris>(*this, Target, Args);
break;
+ case llvm::Triple::CUDA:
+ TC = std::make_unique<toolchains::NVPTXToolChain>(*this, Target, Args);
+ break;
case llvm::Triple::AMDHSA:
TC = std::make_unique<toolchains::ROCMToolChain>(*this, Target, Args);
break;
@@ -6056,11 +6059,6 @@
}
}
- // Intentionally omitted from the switch above: llvm::Triple::CUDA. CUDA
- // compiles always need two toolchains, the CUDA toolchain and the host
- // toolchain. So the only valid way to create a CUDA toolchain is via
- // CreateOffloadingDeviceToolChains.
-
return *TC;
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits