gtbercea updated this revision to Diff 159536.
gtbercea marked 3 inline comments as done.
gtbercea added a comment.
- Address comments.
Repository:
rC Clang
https://reviews.llvm.org/D47394
Files:
include/clang/Driver/Action.h
include/clang/Driver/Compilation.h
include/clang/Driver/Options.td
include/clang/Driver/ToolChain.h
lib/Driver/Action.cpp
lib/Driver/Compilation.cpp
lib/Driver/Driver.cpp
lib/Driver/ToolChain.cpp
lib/Driver/ToolChains/Clang.cpp
lib/Driver/ToolChains/Clang.h
lib/Driver/ToolChains/Cuda.cpp
test/Driver/openmp-offload-gpu-linux.c
test/Driver/openmp-offload-gpu.c
test/Driver/openmp-offload.c
Index: test/Driver/openmp-offload.c
===================================================================
--- test/Driver/openmp-offload.c
+++ test/Driver/openmp-offload.c
@@ -480,13 +480,13 @@
// Create host object and bundle.
// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
// CHK-BUJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
-// CHK-BUJOBS: clang-offload-bundler{{.*}}" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=
+// CHK-BUJOBS: clang-offload-bundler{{.*}}" "-type=o"{{.*}}"-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=
// CHK-BUJOBS-SAME: [[RES:[^\\/]+\.o]]" "-inputs={{.*}}[[T1OBJ]],{{.*}}[[T2OBJ]],{{.*}}[[HOSTOBJ]]"
// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "
// CHK-BUJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
// CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "
// CHK-BUJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
-// CHK-BUJOBS-ST: clang-offload-bundler{{.*}}" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=
+// CHK-BUJOBS-ST: clang-offload-bundler{{.*}}" "-type=o"{{.*}}"-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=
// CHK-BUJOBS-ST-SAME: [[RES:[^\\/]+\.o]]" "-inputs={{.*}}[[T1OBJ]],{{.*}}[[T2OBJ]],{{.*}}[[HOSTOBJ]]"
/// ###########################################################################
Index: test/Driver/openmp-offload-gpu.c
===================================================================
--- test/Driver/openmp-offload-gpu.c
+++ test/Driver/openmp-offload-gpu.c
@@ -61,7 +61,7 @@
/// Check cubin file generation and bundling
// RUN: %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN: -no-canonical-prefixes -save-temps %s -c 2>&1 \
+// RUN: -no-canonical-prefixes -save-temps %s -c -fopenmp-use-target-bundling 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
@@ -73,7 +73,7 @@
/// Check cubin file unbundling and usage by nvlink
// RUN: touch %t.o
// RUN: %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
-// RUN: -no-canonical-prefixes -save-temps %t.o 2>&1 \
+// RUN: -no-canonical-prefixes -save-temps %t.o -fopenmp-use-target-bundling 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
/// Use DAG to ensure that cubin file has been unbundled.
@@ -87,11 +87,11 @@
// RUN: touch %t1.o
// RUN: touch %t2.o
// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
-// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o -fopenmp-use-target-bundling 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
-// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o -fopenmp-use-target-bundling 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
Index: test/Driver/openmp-offload-gpu-linux.c
===================================================================
--- /dev/null
+++ test/Driver/openmp-offload-gpu-linux.c
@@ -0,0 +1,52 @@
+///
+/// Perform driver tests for OpenMP offloading on Linux systems
+///
+
+// UNSUPPORTED: system-windows
+
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: powerpc-registered-target
+// REQUIRES: nvptx-registered-target
+
+/// Check cubin file generation and partial linking with ld
+// RUN: %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: -no-canonical-prefixes -save-temps %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
+
+// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-PTXAS-CUBIN-BUNDLING: fatbinary{{.*}}" "--create=[[FATBIN:.*\.fatbin]]" "
+// CHK-PTXAS-CUBIN-BUNDLING-SAME: --embedded-fatbin=[[FATBINC:.*\.fatbin.c]]" "
+// CHK-PTXAS-CUBIN-BUNDLING-SAME: --cmdline=--compile-only" "--image=profile={{.*}}[[PTX]]" "
+// CHK-PTXAS-CUBIN-BUNDLING-SAME: --image=profile={{.*}}file=[[CUBIN]]" "--cuda" "--device-c"
+// CHK-PTXAS-CUBIN-BUNDLING: clang++{{.*}}" "-c" "-o" "[[HOSTDEV:.*\.o]]"{{.*}}" "[[FATBINC]]" "-D__NV_MODULE_ID=
+// CHK-PTXAS-CUBIN-BUNDLING-NOT: clang-offload-bundler
+// CHK-PTXAS-CUBIN-BUNDLING: ld" "-r" "[[HOSTDEV]]" "{{.*}}.o" "-o" "{{.*}}.o"
+
+/// ###########################################################################
+
+/// Check object file unbundling is not happening when skipping bundler
+// RUN: touch %t.o
+// RUN: %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: -no-canonical-prefixes -save-temps %t.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
+
+/// Use DAG to ensure that object file has not been unbundled.
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[OBJ:.*\.o]]"
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: ld{{.*}}" {{.*}}"[[OBJ]]"
+
+/// ###########################################################################
+
+/// Check object file generation is not happening when skipping bundler
+// RUN: touch %t1.o
+// RUN: touch %t2.o
+// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+
+// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.o" "{{.*}}openmp-offload-{{.*}}.o"
Index: lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -382,7 +382,8 @@
CmdArgs.push_back("--gpu-name");
CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
CmdArgs.push_back("--output-file");
- CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
+ const char *CubinF = Args.MakeArgString(TC.getInputFilename(Output));
+ CmdArgs.push_back(CubinF);
for (const auto& II : Inputs)
CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
@@ -408,6 +409,130 @@
else
Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+
+ // For OpenMP targets offloaded to an NVIDIA device offloading, call the
+ // NVIDIA tools that make the object file discoverable by NVLINK.
+ // Wrap the resulting fatbinary file into a host-friendly object file to
+ // be linked with the host object file.
+ if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
+ Args.hasArg(options::OPT_c) &&
+ C.canSkipOffloadBundler()) {
+ ArgStringList FatbinaryCmdArgs;
+ FatbinaryCmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
+
+ ArgStringList CompilerCmdArgs;
+ CompilerCmdArgs.push_back(Args.MakeArgString("-c"));
+ CompilerCmdArgs.push_back(Args.MakeArgString("-o"));
+ CompilerCmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
+ CompilerCmdArgs.push_back(Args.MakeArgString(llvm::Twine("-I") +
+ TC.CudaInstallation.getBinPath() + llvm::Twine("/../include")));
+
+ // Create fatbin file using fatbinary executable.
+ SmallString<128> OrigOutputFileName =
+ llvm::sys::path::filename(Output.getFilename());
+
+ // Create fatbin file.
+ const char *FatbinF;
+ if (C.getDriver().isSaveTempsEnabled()) {
+ llvm::sys::path::replace_extension(OrigOutputFileName, "fatbin");
+ FatbinF = C.getArgs().MakeArgString(OrigOutputFileName.c_str());
+ } else {
+ llvm::sys::path::replace_extension(OrigOutputFileName, "");
+ OrigOutputFileName =
+ C.getDriver().GetTemporaryPath(OrigOutputFileName, "fatbin");
+ FatbinF =
+ C.addTempFile(C.getArgs().MakeArgString(OrigOutputFileName.c_str()));
+ }
+ FatbinaryCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("--create=") + FatbinF));
+
+ // Create fatbin file wrapper using fatbinary executable.
+ const char *WrappedFatbinF;
+ llvm::sys::path::replace_extension(OrigOutputFileName, "fatbin.c");
+ if (C.getDriver().isSaveTempsEnabled())
+ WrappedFatbinF = C.getArgs().MakeArgString(OrigOutputFileName);
+ else
+ WrappedFatbinF =
+ C.addTempFile(C.getArgs().MakeArgString(OrigOutputFileName));
+ FatbinaryCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("--embedded-fatbin=") +
+ WrappedFatbinF));
+
+ // Continue assembling the host compiler arguments.
+ CompilerCmdArgs.push_back(Args.MakeArgString(WrappedFatbinF));
+
+ StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
+ assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink.");
+
+ for (const auto& II : Inputs) {
+ SmallString<128> OrigInputFileName =
+ llvm::sys::path::filename(II.getFilename());
+
+ if (II.getType() == types::TY_LLVM_IR ||
+ II.getType() == types::TY_LTO_IR ||
+ II.getType() == types::TY_LTO_BC ||
+ II.getType() == types::TY_LLVM_BC) {
+ C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
+ << getToolChain().getTripleString();
+ continue;
+ }
+
+ // Currently, we only pass the input files to the linker, we do not pass
+ // any libraries that may be valid only for the host. Any static
+ // libraries will be handled at the link stage.
+ if (!II.isFilename() || OrigInputFileName.endswith(".a"))
+ continue;
+
+ auto *A = II.getAction();
+ assert(A->getInputs().size() == 1 &&
+ "Device offload action is expected to have a single input");
+ CudaArch gpu_arch = StringToCudaArch(GPUArch);
+
+ // We need to pass an Arch of the form "sm_XX" for cubin files and
+ // "compute_XX" for ptx.
+ const char *Arch =
+ (II.getType() == types::TY_PP_Asm)
+ ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
+ : GPUArch.str().c_str();
+ const char *PtxF =
+ C.addTempFile(C.getArgs().MakeArgString(II.getFilename()));
+ FatbinaryCmdArgs.push_back("--cmdline=--compile-only");
+ FatbinaryCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("--image=profile=") +
+ Arch + ",file=" + PtxF));
+ FatbinaryCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("--image=profile=") +
+ GPUArch.str().c_str() + "@" + Arch + ",file=" + CubinF));
+ }
+
+ FatbinaryCmdArgs.push_back(Args.MakeArgString("--cuda"));
+ FatbinaryCmdArgs.push_back(Args.MakeArgString("--device-c"));
+
+ for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
+ FatbinaryCmdArgs.push_back(Args.MakeArgString(A));
+
+ // fatbinary --create=ompprint.fatbin -64
+ // --image=profile=compute_35,file=ompprint.compute_35.ptx
+ // --image=profile=sm_35@compute_35,file=ompprint.compute_35.sm_35.cubin
+ // --embedded-fatbin=ompprint.fatbin.c --cuda --device-c
+ const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
+ C.addCommand(llvm::make_unique<Command>(
+ JA, *this, Exec, FatbinaryCmdArgs, Inputs));
+
+ // Come up with a unique name for the fatbin segment. The name uses
+ // the hash of the full path of the file.
+ std::hash<std::string> HashFn;
+ size_t hash = HashFn(llvm::sys::path::filename(Output.getFilename()));
+ CompilerCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("-D__NV_MODULE_ID=") +
+ llvm::Twine(hash)));
+
+ // clang++ -c ompprint.fatbin.c -I/path/to/cuda/include/dir
+ const char *CompilerExec =
+ Args.MakeArgString(TC.GetProgramPath("clang++"));
+ C.addCommand(llvm::make_unique<Command>(
+ JA, *this, CompilerExec, CompilerCmdArgs, Inputs));
+ }
}
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
@@ -512,6 +637,9 @@
// Add paths specified in LIBRARY_PATH environment variable as -L options.
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
+ if (C.canSkipOffloadBundler())
+ Args.AddAllArgs(CmdArgs, options::OPT_L);
+
// Add paths for the default clang library path.
SmallString<256> DefaultLibPath =
llvm::sys::path::parent_path(TC.getDriver().Dir);
@@ -531,15 +659,37 @@
continue;
}
- // Currently, we only pass the input files to the linker, we do not pass
- // any libraries that may be valid only for the host.
- if (!II.isFilename())
+ if (!II.isFilename()) {
+ // Anything that's not a file name is potentially a static library
+ // so treat it as such.
+ if (C.canSkipOffloadBundler())
+ CmdArgs.push_back(C.getArgs().MakeArgString(llvm::Twine("-l") +
+ II.getInputArg().getValue()));
continue;
+ }
- const char *CubinF = C.addTempFile(
- C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
-
- CmdArgs.push_back(CubinF);
+ StringRef OrigInputFileName =
+ llvm::sys::path::filename(II.getBaseInput());
+ if (OrigInputFileName.endswith(".a")) {
+ const char *StaticLibName =
+ C.getArgs().MakeArgString(II.getFilename());
+ CmdArgs.push_back(StaticLibName);
+ } else {
+ // If the original input is not an object file then it means the
+ // assembly step has actually produced a cubin so we need to
+ // rename it accordingly.
+ if ((!C.canSkipOffloadBundler() && OrigInputFileName.endswith(".o")) ||
+ (C.canSkipOffloadBundler() && !OrigInputFileName.endswith(".o"))) {
+ // Create cubin file name and add it as a temporary file.
+ SmallString<256> Filename(II.getFilename());
+ llvm::sys::path::replace_extension(Filename, "cubin");
+ const char *CubinF = C.addTempFile(
+ C.getArgs().MakeArgString(Filename.str()));
+ CmdArgs.push_back(CubinF);
+ } else {
+ CmdArgs.push_back(II.getFilename());
+ }
+ }
}
AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
Index: lib/Driver/ToolChains/Clang.h
===================================================================
--- lib/Driver/ToolChains/Clang.h
+++ lib/Driver/ToolChains/Clang.h
@@ -147,6 +147,19 @@
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
};
+
+/// Partial linker tool.
+class LLVM_LIBRARY_VISIBILITY PartialLinker final : public Tool {
+public:
+ PartialLinker(const ToolChain &TC)
+ : Tool("PartialLinker", "partial-linker", TC) {}
+
+ bool hasIntegratedCPP() const override { return false; }
+ void ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &TCArgs,
+ const char *LinkingOutput) const override;
+};
} // end namespace tools
} // end namespace driver
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -5538,6 +5538,49 @@
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
}
+// Begin partial linking
+
+void PartialLinker::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const llvm::opt::ArgList &TCArgs,
+ const char *LinkingOutput) const {
+ // The version with only one output is expected to refer to a bundling job.
+ assert(isa<PartialLinkerJobAction>(JA) && "Expecting partial linking job!");
+
+ // The partial linking command line (using ld as example):
+ // ld -r input1.o input2.o -o single-file.o
+ ArgStringList CmdArgs;
+
+ // Ensure conditions are met for doing partial linking instead of bundling.
+ assert(TCArgs.hasArg(options::OPT_c) &&
+ "Can only use partial linking for object file generation.");
+ assert(C.canSkipOffloadBundler() &&
+ "Offload bundler cannot be skipped.");
+
+ // TODO: the assert may be removed once a more elaborate checking is in
+ // place in the Driver.
+ StringRef LinkerName = getToolChain().GetLinkerPath();
+ assert(LinkerName.endswith("/ld") && "Partial linking not supported.");
+
+ // Enable partial linking.
+ CmdArgs.push_back(TCArgs.MakeArgString("-r"));
+
+ // Add input files.
+ for (unsigned I = 0; I < Inputs.size(); ++I) {
+ CmdArgs.push_back(TCArgs.MakeArgString(Inputs[I].getFilename()));
+ }
+
+ // Add output file.
+ CmdArgs.push_back(TCArgs.MakeArgString("-o"));
+ CmdArgs.push_back(TCArgs.MakeArgString(Output.getFilename()));
+
+ // Add partial linker command.
+ C.addCommand(llvm::make_unique<Command>(
+ JA, *this, TCArgs.MakeArgString(getToolChain().GetLinkerPath()),
+ CmdArgs, None));
+}
+
// Begin OffloadBundler
void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
Index: lib/Driver/ToolChain.cpp
===================================================================
--- lib/Driver/ToolChain.cpp
+++ lib/Driver/ToolChain.cpp
@@ -277,6 +277,12 @@
return OffloadBundler.get();
}
+Tool *ToolChain::getPartialLinker() const {
+ if (!PartialLinker)
+ PartialLinker.reset(new tools::PartialLinker(*this));
+ return PartialLinker.get();
+}
+
Tool *ToolChain::getTool(Action::ActionClass AC) const {
switch (AC) {
case Action::AssembleJobClass:
@@ -305,6 +311,10 @@
case Action::OffloadBundlingJobClass:
case Action::OffloadUnbundlingJobClass:
return getOffloadBundler();
+
+ case Action::PartialLinkerJobClass:
+ return getPartialLinker();
+
}
llvm_unreachable("Invalid tool kind.");
Index: lib/Driver/Driver.cpp
===================================================================
--- lib/Driver/Driver.cpp
+++ lib/Driver/Driver.cpp
@@ -2793,7 +2793,8 @@
/// results will be kept in this action builder. Return true if an error was
/// found.
bool addHostDependenceToDeviceActions(Action *&HostAction,
- const Arg *InputArg) {
+ const Arg *InputArg,
+ bool SkipBundler) {
if (!IsValid)
return true;
@@ -2805,7 +2806,8 @@
// the input is not a bundle.
if (CanUseBundler && isa<InputAction>(HostAction) &&
InputArg->getOption().getKind() == llvm::opt::Option::InputClass &&
- !types::isSrcFile(HostAction->getType())) {
+ !types::isSrcFile(HostAction->getType()) &&
+ !SkipBundler) {
auto UnbundlingHostAction =
C.MakeAction<OffloadUnbundlingJobAction>(HostAction);
UnbundlingHostAction->registerDependentActionInfo(
@@ -2842,7 +2844,7 @@
/// function can replace the host action by a bundling action if the
/// programming models allow it.
bool appendTopLevelActions(ActionList &AL, Action *HostAction,
- const Arg *InputArg) {
+ const Arg *InputArg, bool usePartialLinkStep) {
// Get the device actions to be appended.
ActionList OffloadAL;
for (auto *SB : SpecializedBuilders) {
@@ -2860,7 +2862,10 @@
// We expect that the host action was just appended to the action list
// before this method was called.
assert(HostAction == AL.back() && "Host action not in the list??");
- HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL);
+ if (usePartialLinkStep)
+ HostAction = C.MakeAction<PartialLinkerJobAction>(OffloadAL);
+ else
+ HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL);
AL.back() = HostAction;
} else
AL.append(OffloadAL.begin(), OffloadAL.end());
@@ -3007,6 +3012,52 @@
YcArg = YuArg = nullptr;
}
+ // Determine whether the bundler tool can be skipped based on the set
+ // of triples provided to the -fopenmp-targets flag, if it is present.
+ bool CanSkipClangOffloadBundler = false;
+ if (!Args.hasArg(options::OPT_fopenmp_use_target_bundling)) {
+ if (Arg *OpenMPTargets = C.getInputArgs().getLastArg(
+ options::OPT_fopenmp_targets_EQ)) {
+ if (OpenMPTargets->getValues().size() > 0) {
+ unsigned triplesRequiringBundler = 0;
+ for (const char *Val : OpenMPTargets->getValues()) {
+ llvm::Triple TT(Val);
+
+ // If the list of tripled contains an invalid triple or
+ // contains a valid non-NVPTX triple then the bundler
+ // can be used.
+ if (TT.getArch() == llvm::Triple::UnknownArch ||
+ (TT.getArch() != llvm::Triple::UnknownArch &&
+ !TT.isNVPTX())) {
+ triplesRequiringBundler++;
+ }
+ }
+ CanSkipClangOffloadBundler = (triplesRequiringBundler == 0);
+ C.setSkipOffloadBundler(CanSkipClangOffloadBundler);
+ }
+ }
+ }
+
+ // Determine whether a linker which supports partial linking
+ // exists. On linux systems ld provides this functionality, there
+ // may be other linkers that work also.
+ // TODO: test if linker supports partial linking i.e. -r
+ // We know ld does so we will actually check if the linker
+ // is ld instead but this needs to be replaced.
+ bool CanDoPartialLinking = false;
+ if (CanSkipClangOffloadBundler &&
+ C.getInputArgs().hasArg(options::OPT_c)) {
+ // The bundler can be replaced with a partilal linking step
+ // only when outputing an object. For all other cases the
+ // fallback solution is the clang-offload-bundler.
+ StringRef LinkerName = C.getDefaultToolChain().GetLinkerPath();
+
+ // TODO: test if linker supports partial linking i.e. -r
+ // We know ld does so we will actually check if the linker
+ // is ld instead but this needs to be replaced.
+ CanDoPartialLinking = LinkerName.endswith("/ld");
+ }
+
// Builder to be used to build offloading actions.
OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
@@ -3082,7 +3133,13 @@
// Use the current host action in any of the offloading actions, if
// required.
- if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
+ // The action may contain a bundling step which should not be executed
+ // if the toolchain we are targeting can produce object files that
+ // are understood by the host linker.
+ bool SkipBundler = (InputType == types::TY_Object) &&
+ CanSkipClangOffloadBundler;
+ if (OffloadBuilder.addHostDependenceToDeviceActions(
+ Current, InputArg, SkipBundler))
break;
for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
@@ -3118,7 +3175,8 @@
// Use the current host action in any of the offloading actions, if
// required.
- if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
+ if (OffloadBuilder.addHostDependenceToDeviceActions(
+ Current, InputArg, SkipBundler))
break;
if (Current->getType() == types::TY_Nothing)
@@ -3130,7 +3188,8 @@
Actions.push_back(Current);
// Add any top level actions generated for offloading.
- OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);
+ OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg,
+ CanDoPartialLinking);
}
// Add a link action if necessary.
@@ -3680,6 +3739,7 @@
InputInfoList OffloadDependencesInputInfo;
bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None;
+
if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
// The 'Darwin' toolchain is initialized only when its arguments are
// computed. Get the default arguments for OFK_None to ensure that
Index: lib/Driver/Compilation.cpp
===================================================================
--- lib/Driver/Compilation.cpp
+++ lib/Driver/Compilation.cpp
@@ -283,3 +283,11 @@
void Compilation::Redirect(ArrayRef<Optional<StringRef>> Redirects) {
this->Redirects = Redirects;
}
+
+void Compilation::setSkipOffloadBundler(bool skipBundler) {
+ SkipOffloadBundler = skipBundler;
+}
+
+bool Compilation::canSkipOffloadBundler() const {
+ return SkipOffloadBundler;
+}
Index: lib/Driver/Action.cpp
===================================================================
--- lib/Driver/Action.cpp
+++ lib/Driver/Action.cpp
@@ -40,6 +40,8 @@
return "clang-offload-bundler";
case OffloadUnbundlingJobClass:
return "clang-offload-unbundler";
+ case PartialLinkerJobClass:
+ return "partial-linker";
}
llvm_unreachable("invalid class");
@@ -388,3 +390,8 @@
OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(Action *Input)
: JobAction(OffloadUnbundlingJobClass, Input, Input->getType()) {}
+
+void PartialLinkerJobAction::anchor() {}
+
+PartialLinkerJobAction::PartialLinkerJobAction(ActionList &Inputs)
+ : JobAction(PartialLinkerJobClass, Inputs, Inputs.front()->getType()) {}
Index: include/clang/Driver/ToolChain.h
===================================================================
--- include/clang/Driver/ToolChain.h
+++ include/clang/Driver/ToolChain.h
@@ -126,12 +126,14 @@
mutable std::unique_ptr<Tool> Assemble;
mutable std::unique_ptr<Tool> Link;
mutable std::unique_ptr<Tool> OffloadBundler;
+ mutable std::unique_ptr<Tool> PartialLinker;
Tool *getClang() const;
Tool *getAssemble() const;
Tool *getLink() const;
Tool *getClangAs() const;
Tool *getOffloadBundler() const;
+ Tool *getPartialLinker() const;
mutable std::unique_ptr<SanitizerArgs> SanitizerArguments;
mutable std::unique_ptr<XRayArgs> XRayArguments;
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1521,6 +1521,10 @@
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>;
+def fopenmp_use_target_bundling : Flag<["-"], "fopenmp-use-target-bundling">, Group<f_Group>,
+ Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fno_openmp_use_target_bundling : Flag<["-"], "fno-openmp-use-target-bundling">, Group<f_Group>,
+ Flags<[NoArgumentUnused, HelpHidden]>;
def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
def fno_escaping_block_tail_calls : Flag<["-"], "fno-escaping-block-tail-calls">, Group<f_Group>, Flags<[CC1Option]>;
Index: include/clang/Driver/Compilation.h
===================================================================
--- include/clang/Driver/Compilation.h
+++ include/clang/Driver/Compilation.h
@@ -125,6 +125,9 @@
/// Whether to keep temporary files regardless of -save-temps.
bool ForceKeepTempFiles = false;
+ /// Whether the clang-offload-bundler can be skipped.
+ bool SkipOffloadBundler = false;
+
public:
Compilation(const Driver &D, const ToolChain &DefaultToolChain,
llvm::opt::InputArgList *Args,
@@ -304,6 +307,16 @@
/// of three. The inferior process's stdin(0), stdout(1), and stderr(2) will
/// be redirected to the corresponding paths, if provided (not llvm::None).
void Redirect(ArrayRef<Optional<StringRef>> Redirects);
+
+ /// Set whether the compilation can avoid calling the clang-offload-bundler
+ /// for object file types.
+ ///
+ /// \param skipBundler - bool value set once by the driver.
+ void setSkipOffloadBundler(bool skipBundler);
+
+ /// Returns true when calls to the clang-offload-bundler are not required
+ /// for object types.
+ bool canSkipOffloadBundler() const;
};
} // namespace driver
Index: include/clang/Driver/Action.h
===================================================================
--- include/clang/Driver/Action.h
+++ include/clang/Driver/Action.h
@@ -71,9 +71,10 @@
VerifyPCHJobClass,
OffloadBundlingJobClass,
OffloadUnbundlingJobClass,
+ PartialLinkerJobClass,
JobClassFirst = PreprocessJobClass,
- JobClassLast = OffloadUnbundlingJobClass
+ JobClassLast = PartialLinkerJobClass
};
// The offloading kind determines if this action is binded to a particular
@@ -589,6 +590,18 @@
}
};
+class PartialLinkerJobAction : public JobAction {
+ void anchor() override;
+
+public:
+ // Partial linking does not change the type of output.
+ PartialLinkerJobAction(ActionList &Inputs);
+
+ static bool classof(const Action *A) {
+ return A->getKind() == PartialLinkerJobClass;
+ }
+};
+
} // namespace driver
} // namespace clang
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits