gtbercea updated this revision to Diff 148677.
Repository:
rC Clang
https://reviews.llvm.org/D47394
Files:
include/clang/Driver/Action.h
include/clang/Driver/Compilation.h
include/clang/Driver/Driver.h
include/clang/Driver/ToolChain.h
lib/Driver/Action.cpp
lib/Driver/Compilation.cpp
lib/Driver/Driver.cpp
lib/Driver/ToolChain.cpp
lib/Driver/ToolChains/Clang.cpp
lib/Driver/ToolChains/Clang.h
lib/Driver/ToolChains/Cuda.cpp
test/Driver/openmp-offload-gpu.c
test/Driver/openmp-offload.c
Index: test/Driver/openmp-offload.c
===================================================================
--- test/Driver/openmp-offload.c
+++ test/Driver/openmp-offload.c
@@ -480,13 +480,13 @@
// Create host object and bundle.
// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
// CHK-BUJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
-// CHK-BUJOBS: clang-offload-bundler{{.*}}" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=
+// CHK-BUJOBS: clang-offload-bundler{{.*}}" "-type=o"{{.*}}"-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=
// CHK-BUJOBS-SAME: [[RES:[^\\/]+\.o]]" "-inputs={{.*}}[[T1OBJ]],{{.*}}[[T2OBJ]],{{.*}}[[HOSTOBJ]]"
// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "
// CHK-BUJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
// CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "
// CHK-BUJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
-// CHK-BUJOBS-ST: clang-offload-bundler{{.*}}" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=
+// CHK-BUJOBS-ST: clang-offload-bundler{{.*}}" "-type=o"{{.*}}"-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=
// CHK-BUJOBS-ST-SAME: [[RES:[^\\/]+\.o]]" "-inputs={{.*}}[[T1OBJ]],{{.*}}[[T2OBJ]],{{.*}}[[HOSTOBJ]]"
/// ###########################################################################
Index: test/Driver/openmp-offload-gpu.c
===================================================================
--- test/Driver/openmp-offload-gpu.c
+++ test/Driver/openmp-offload-gpu.c
@@ -66,24 +66,29 @@
// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
-// CHK-PTXAS-CUBIN-BUNDLING: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-inputs={{.*}}[[CUBIN]]
+// CHK-PTXAS-CUBIN-BUNDLING: fatbinary{{.*}}" "--create=[[FATBIN:.*\.fatbin]]" "
+// CHK-PTXAS-CUBIN-BUNDLING-SAME: --embedded-fatbin=[[FATBINC:.*\.fatbin.c]]" "
+// CHK-PTXAS-CUBIN-BUNDLING-SAME: --cmdline=--compile-only" "--image=profile={{.*}}[[PTX]]" "
+// CHK-PTXAS-CUBIN-BUNDLING-SAME: --image=profile={{.*}}file=[[CUBIN]]" "--cuda" "--device-c"
+// CHK-PTXAS-CUBIN-BUNDLING: clang++{{.*}}" "-c" "-o" "[[HOSTDEV:.*\.o]]"{{.*}}" "[[FATBINC]]" "-D__NV_MODULE_ID=
+// CHK-PTXAS-CUBIN-BUNDLING-NOT: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-inputs={{.*}}[[CUBIN]]
+// CHK-PTXAS-CUBIN-BUNDLING: ld" "-r" "[[HOSTDEV]]" "{{.*}}.o" "-o" "{{.*}}.o"
/// ###########################################################################
-/// Check cubin file unbundling and usage by nvlink
+/// Check object file unbundling is not happening when skipping bundler
// RUN: touch %t.o
// RUN: %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -no-canonical-prefixes -save-temps %t.o 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
-/// Use DAG to ensure that cubin file has been unbundled.
-// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[CUBIN:.*\.cubin]]"
-// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-outputs={{.*}}[[CUBIN]]
-// CHK-CUBIN-UNBUNDLING-NVLINK-DAG-SAME: "-unbundle"
+/// Use DAG to ensure that object file has not been unbundled.
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[OBJ:.*\.o]]"
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: ld{{.*}}" {{.*}}"[[OBJ]]"
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink
+/// Check object file generation is not happening when skipping bundler
// RUN: touch %t1.o
// RUN: touch %t2.o
// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
@@ -94,7 +99,7 @@
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
-// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
+// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.o" "{{.*}}openmp-offload-{{.*}}.o"
/// ###########################################################################
Index: lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -382,7 +382,8 @@
CmdArgs.push_back("--gpu-name");
CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
CmdArgs.push_back("--output-file");
- CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
+ const char *CubinF = Args.MakeArgString(TC.getInputFilename(Output));
+ CmdArgs.push_back(CubinF);
for (const auto& II : Inputs)
CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
@@ -408,6 +409,130 @@
else
Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+
+ // For OpenMP targets offloaded to an NVIDIA device offloading, call the
+ // NVIDIA tools that make the object file discoverable by NVLINK.
+ // Wrap the resulting fatbinary file into a host-friendly object file to
+ // be linked with the host object file.
+ if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
+ Args.hasArg(options::OPT_c) &&
+ C.canSkipOffloadBundler()) {
+ ArgStringList FatbinaryCmdArgs;
+ FatbinaryCmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
+
+ ArgStringList CompilerCmdArgs;
+ CompilerCmdArgs.push_back(Args.MakeArgString("-c"));
+ CompilerCmdArgs.push_back(Args.MakeArgString("-o"));
+ CompilerCmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
+ CompilerCmdArgs.push_back(Args.MakeArgString(llvm::Twine("-I") +
+ TC.CudaInstallation.getBinPath() + llvm::Twine("/../include")));
+
+ // Create fatbin file using fatbinary executable.
+ SmallString<128> OrigOutputFileName =
+ llvm::sys::path::filename(Output.getFilename());
+
+ // Create fatbin file.
+ const char *FatbinF;
+ if (C.getDriver().isSaveTempsEnabled()) {
+ llvm::sys::path::replace_extension(OrigOutputFileName, "fatbin");
+ FatbinF = C.getArgs().MakeArgString(OrigOutputFileName.c_str());
+ } else {
+ llvm::sys::path::replace_extension(OrigOutputFileName, "");
+ OrigOutputFileName =
+ C.getDriver().GetTemporaryPath(OrigOutputFileName, "fatbin");
+ FatbinF =
+ C.addTempFile(C.getArgs().MakeArgString(OrigOutputFileName.c_str()));
+ }
+ FatbinaryCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("--create=") + FatbinF));
+
+ // Create fatbin file wrapper using fatbinary executable.
+ const char *WrappedFatbinF;
+ llvm::sys::path::replace_extension(OrigOutputFileName, "fatbin.c");
+ if (C.getDriver().isSaveTempsEnabled())
+ WrappedFatbinF = C.getArgs().MakeArgString(OrigOutputFileName);
+ else
+ WrappedFatbinF =
+ C.addTempFile(C.getArgs().MakeArgString(OrigOutputFileName));
+ FatbinaryCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("--embedded-fatbin=") +
+ WrappedFatbinF));
+
+ // Continue assembling the host compiler arguments.
+ CompilerCmdArgs.push_back(Args.MakeArgString(WrappedFatbinF));
+
+ StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
+ assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink.");
+
+ for (const auto& II : Inputs) {
+ SmallString<128> OrigInputFileName =
+ llvm::sys::path::filename(II.getFilename());
+
+ if (II.getType() == types::TY_LLVM_IR ||
+ II.getType() == types::TY_LTO_IR ||
+ II.getType() == types::TY_LTO_BC ||
+ II.getType() == types::TY_LLVM_BC) {
+ C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
+ << getToolChain().getTripleString();
+ continue;
+ }
+
+ // Currently, we only pass the input files to the linker, we do not pass
+ // any libraries that may be valid only for the host. Any static
+ // libraries will be handled at the link stage.
+ if (!II.isFilename() || OrigInputFileName.endswith(".a"))
+ continue;
+
+ auto *A = II.getAction();
+ assert(A->getInputs().size() == 1 &&
+ "Device offload action is expected to have a single input");
+ CudaArch gpu_arch = StringToCudaArch(GPUArch);
+
+ // We need to pass an Arch of the form "sm_XX" for cubin files and
+ // "compute_XX" for ptx.
+ const char *Arch =
+ (II.getType() == types::TY_PP_Asm)
+ ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
+ : GPUArch.str().c_str();
+ const char *PtxF =
+ C.addTempFile(C.getArgs().MakeArgString(II.getFilename()));
+ FatbinaryCmdArgs.push_back("--cmdline=--compile-only");
+ FatbinaryCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("--image=profile=") +
+ Arch + ",file=" + PtxF));
+ FatbinaryCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("--image=profile=") +
+ GPUArch.str().c_str() + "@" + Arch + ",file=" + CubinF));
+ }
+
+ FatbinaryCmdArgs.push_back(Args.MakeArgString("--cuda"));
+ FatbinaryCmdArgs.push_back(Args.MakeArgString("--device-c"));
+
+ for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
+ FatbinaryCmdArgs.push_back(Args.MakeArgString(A));
+
+ // fatbinary --create=ompprint.fatbin -64
+ // --image=profile=compute_35,file=ompprint.compute_35.ptx
+ // --image=profile=sm_35@compute_35,file=ompprint.compute_35.sm_35.cubin
+ // --embedded-fatbin=ompprint.fatbin.c --cuda --device-c
+ const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
+ C.addCommand(llvm::make_unique<Command>(
+ JA, *this, Exec, FatbinaryCmdArgs, Inputs));
+
+ // Come up with a unique name for the fatbin segment. The name uses
+ // the hash of the full path of the file.
+ std::hash<std::string> hash_fn;
+ size_t hash = hash_fn(llvm::sys::path::filename(Output.getFilename()));
+ CompilerCmdArgs.push_back(
+ Args.MakeArgString(llvm::Twine("-D__NV_MODULE_ID=") +
+ llvm::Twine(hash)));
+
+ // clang++ -c ompprint.fatbin.c -I/path/to/cuda/include/dir
+ const char *CompilerExec =
+ Args.MakeArgString(TC.GetProgramPath("clang++"));
+ C.addCommand(llvm::make_unique<Command>(
+ JA, *this, CompilerExec, CompilerCmdArgs, Inputs));
+ }
}
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
@@ -512,6 +637,9 @@
// Add paths specified in LIBRARY_PATH environment variable as -L options.
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
+ if (C.canSkipOffloadBundler())
+ Args.AddAllArgs(CmdArgs, options::OPT_L);
+
// Add paths for the default clang library path.
SmallString<256> DefaultLibPath =
llvm::sys::path::parent_path(TC.getDriver().Dir);
@@ -531,15 +659,36 @@
continue;
}
- // Currently, we only pass the input files to the linker, we do not pass
- // any libraries that may be valid only for the host.
- if (!II.isFilename())
+ if (!II.isFilename()) {
+ // Anything that's not a file name is potentially a static library
+ // so treat it as such.
+ if (C.canSkipOffloadBundler())
+ CmdArgs.push_back(C.getArgs().MakeArgString(llvm::Twine("-l") +
+ II.getInputArg().getValue()));
continue;
+ }
- const char *CubinF = C.addTempFile(
- C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
-
- CmdArgs.push_back(CubinF);
+ StringRef OrigInputFileName =
+ llvm::sys::path::filename(II.getBaseInput());
+ if (OrigInputFileName.endswith(".a")) {
+ const char *StaticLibName =
+ C.getArgs().MakeArgString(II.getFilename());
+ CmdArgs.push_back(StaticLibName);
+ } else {
+ // If the original input is not an object file then it means the
+ // assembly step has actually produced a cubin so we need to
+ // rename it accordingly.
+ if (!OrigInputFileName.endswith(".o")) {
+ // Create cubin file name and add it as a temporary file.
+ SmallString<256> Filename(II.getFilename());
+ llvm::sys::path::replace_extension(Filename, "cubin");
+ const char *CubinF = C.addTempFile(
+ C.getArgs().MakeArgString(Filename.str()));
+ CmdArgs.push_back(CubinF);
+ } else {
+ CmdArgs.push_back(II.getFilename());
+ }
+ }
}
AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
Index: lib/Driver/ToolChains/Clang.h
===================================================================
--- lib/Driver/ToolChains/Clang.h
+++ lib/Driver/ToolChains/Clang.h
@@ -147,6 +147,19 @@
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
};
+
+/// Partial linker tool.
+class LLVM_LIBRARY_VISIBILITY PartialLinker final : public Tool {
+public:
+ PartialLinker(const ToolChain &TC)
+ : Tool("PartialLinker", "partial-linker", TC) {}
+
+ bool hasIntegratedCPP() const override { return false; }
+ void ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &TCArgs,
+ const char *LinkingOutput) const override;
+};
} // end namespace tools
} // end namespace driver
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -5478,6 +5478,49 @@
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
}
+// Begin partial linking
+
+void PartialLinker::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const llvm::opt::ArgList &TCArgs,
+ const char *LinkingOutput) const {
+ // The version with only one output is expected to refer to a bundling job.
+ assert(isa<PartialLinkerJobAction>(JA) && "Expecting partial linking job!");
+
+ // The partial linking command line (using ld as example):
+ // ld -r input1.o input2.o -o single-file.o
+ ArgStringList CmdArgs;
+
+ // Ensure conditions are met for doing partial linking instead of bundling.
+ assert(TCArgs.hasArg(options::OPT_c) &&
+ "Can only use partial linking for object file generation.");
+ assert(C.canSkipOffloadBundler() &&
+ "Offload bundler cannot be skipped.");
+
+ // TODO: the assert may be removed once a more elaborate checking is in
+ // place in the Driver.
+ StringRef LinkerName = getToolChain().GetLinkerPath();
+ assert(LinkerName.endswith("/ld") && "Partial linking not supported.");
+
+ // Enable partial linking.
+ CmdArgs.push_back(TCArgs.MakeArgString("-r"));
+
+ // Add input files.
+ for (unsigned I = 0; I < Inputs.size(); ++I) {
+ CmdArgs.push_back(TCArgs.MakeArgString(Inputs[I].getFilename()));
+ }
+
+ // Add output file.
+ CmdArgs.push_back(TCArgs.MakeArgString("-o"));
+ CmdArgs.push_back(TCArgs.MakeArgString(Output.getFilename()));
+
+ // Add partial linker command.
+ C.addCommand(llvm::make_unique<Command>(
+ JA, *this, TCArgs.MakeArgString(getToolChain().GetLinkerPath()),
+ CmdArgs, None));
+}
+
// Begin OffloadBundler
void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
Index: lib/Driver/ToolChain.cpp
===================================================================
--- lib/Driver/ToolChain.cpp
+++ lib/Driver/ToolChain.cpp
@@ -272,6 +272,12 @@
return OffloadBundler.get();
}
+Tool *ToolChain::getPartialLinker() const {
+ if (!PartialLinker)
+ PartialLinker.reset(new tools::PartialLinker(*this));
+ return PartialLinker.get();
+}
+
Tool *ToolChain::getTool(Action::ActionClass AC) const {
switch (AC) {
case Action::AssembleJobClass:
@@ -300,6 +306,10 @@
case Action::OffloadBundlingJobClass:
case Action::OffloadUnbundlingJobClass:
return getOffloadBundler();
+
+ case Action::PartialLinkerJobClass:
+ return getPartialLinker();
+
}
llvm_unreachable("Invalid tool kind.");
@@ -553,7 +563,7 @@
StringRef Suffix =
tools::arm::getLLVMArchSuffixForARM(CPU, MArch, Triple);
bool IsMProfile = ARM::parseArchProfile(Suffix) == ARM::ProfileKind::M;
- bool ThumbDefault = IsMProfile || (ARM::parseArchVersion(Suffix) == 7 &&
+ bool ThumbDefault = IsMProfile || (ARM::parseArchVersion(Suffix) == 7 &&
getTriple().isOSBinFormatMachO());
// FIXME: this is invalid for WindowsCE
if (getTriple().isOSWindows())
Index: lib/Driver/Driver.cpp
===================================================================
--- lib/Driver/Driver.cpp
+++ lib/Driver/Driver.cpp
@@ -2683,7 +2683,8 @@
/// results will be kept in this action builder. Return true if an error was
/// found.
bool addHostDependenceToDeviceActions(Action *&HostAction,
- const Arg *InputArg) {
+ const Arg *InputArg,
+ bool skipBundler) {
if (!IsValid)
return true;
@@ -2695,7 +2696,8 @@
// the input is not a bundle.
if (CanUseBundler && isa<InputAction>(HostAction) &&
InputArg->getOption().getKind() == llvm::opt::Option::InputClass &&
- !types::isSrcFile(HostAction->getType())) {
+ !types::isSrcFile(HostAction->getType()) &&
+ !skipBundler) {
auto UnbundlingHostAction =
C.MakeAction<OffloadUnbundlingJobAction>(HostAction);
UnbundlingHostAction->registerDependentActionInfo(
@@ -2732,7 +2734,7 @@
/// function can replace the host action by a bundling action if the
/// programming models allow it.
bool appendTopLevelActions(ActionList &AL, Action *HostAction,
- const Arg *InputArg) {
+ const Arg *InputArg, bool usePartialLinkStep) {
// Get the device actions to be appended.
ActionList OffloadAL;
for (auto *SB : SpecializedBuilders) {
@@ -2750,7 +2752,10 @@
// We expect that the host action was just appended to the action list
// before this method was called.
assert(HostAction == AL.back() && "Host action not in the list??");
- HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL);
+ if (usePartialLinkStep)
+ HostAction = C.MakeAction<PartialLinkerJobAction>(OffloadAL);
+ else
+ HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL);
AL.back() = HostAction;
} else
AL.append(OffloadAL.begin(), OffloadAL.end());
@@ -2913,6 +2918,50 @@
YcArg = YuArg = nullptr;
}
+ // Determine whether the bundler tool can be skipped based on the set
+ // of triples provided to the -fopenmp-targets flag, if it is present.
+ bool canSkipClangOffloadBundler = false;
+ if (Arg *OpenMPTargets = C.getInputArgs().getLastArg(
+ options::OPT_fopenmp_targets_EQ)) {
+ if (OpenMPTargets->getValues().size() > 0) {
+ unsigned triplesRequiringBundler = 0;
+ for (const char *Val : OpenMPTargets->getValues()) {
+ llvm::Triple TT(Val);
+
+ // If the list of tripled contains an invalid triple or
+ // contains a valid non-NVPTX triple then the bundler
+ // can be used.
+ if (TT.getArch() == llvm::Triple::UnknownArch ||
+ (TT.getArch() != llvm::Triple::UnknownArch &&
+ !TT.isNVPTX())) {
+ triplesRequiringBundler++;
+ }
+ }
+ canSkipClangOffloadBundler = (triplesRequiringBundler == 0);
+ C.setSkipOffloadBundler(canSkipClangOffloadBundler);
+ }
+ }
+
+ // Determine whether a linker which supports partial linking
+ // exists. On linux systems ld provides this functionality, there
+ // may be other linkers that work also.
+ // TODO: test if linker supports partial linking i.e. -r
+ // We know ld does so we will actually check if the linker
+ // is ld instead but this needs to be replaced.
+ bool canDoPartialLinking = false;
+ if (canSkipClangOffloadBundler &&
+ C.getInputArgs().hasArg(options::OPT_c)) {
+ // The bundler can be replaced with a partilal linking step
+ // only when outputing an object. For all other cases the
+ // fallback solution is the clang-offload-bundler.
+ StringRef LinkerName = C.getDefaultToolChain().GetLinkerPath();
+
+ // TODO: test if linker supports partial linking i.e. -r
+ // We know ld does so we will actually check if the linker
+ // is ld instead but this needs to be replaced.
+ canDoPartialLinking = LinkerName.endswith("/ld");
+ }
+
// Builder to be used to build offloading actions.
OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
@@ -2988,7 +3037,13 @@
// Use the current host action in any of the offloading actions, if
// required.
- if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
+ // The action may contain a bundling step which should not be executed
+ // if the toolchain we are targeting can produce object files that
+ // are understood by the host linker.
+ bool skipBundler = (InputType == types::TY_Object) &&
+ canSkipClangOffloadBundler;
+ if (OffloadBuilder.addHostDependenceToDeviceActions(
+ Current, InputArg, skipBundler))
break;
for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
@@ -3024,7 +3079,8 @@
// Use the current host action in any of the offloading actions, if
// required.
- if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
+ if (OffloadBuilder.addHostDependenceToDeviceActions(
+ Current, InputArg, skipBundler))
break;
if (Current->getType() == types::TY_Nothing)
@@ -3036,7 +3092,8 @@
Actions.push_back(Current);
// Add any top level actions generated for offloading.
- OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);
+ OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg,
+ canDoPartialLinking);
}
// Add a link action if necessary.
@@ -3586,6 +3643,7 @@
InputInfoList OffloadDependencesInputInfo;
bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None;
+
if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
// The 'Darwin' toolchain is initialized only when its arguments are
// computed. Get the default arguments for OFK_None to ensure that
Index: lib/Driver/Compilation.cpp
===================================================================
--- lib/Driver/Compilation.cpp
+++ lib/Driver/Compilation.cpp
@@ -271,3 +271,11 @@
void Compilation::Redirect(ArrayRef<Optional<StringRef>> Redirects) {
this->Redirects = Redirects;
}
+
+void Compilation::setSkipOffloadBundler(bool skipBundler) {
+ skipOffloadBundler = skipBundler;
+}
+
+bool Compilation::canSkipOffloadBundler() const {
+ return skipOffloadBundler;
+}
Index: lib/Driver/Action.cpp
===================================================================
--- lib/Driver/Action.cpp
+++ lib/Driver/Action.cpp
@@ -40,6 +40,8 @@
return "clang-offload-bundler";
case OffloadUnbundlingJobClass:
return "clang-offload-unbundler";
+ case PartialLinkerJobClass:
+ return "partial-linker";
}
llvm_unreachable("invalid class");
@@ -388,3 +390,8 @@
OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(Action *Input)
: JobAction(OffloadUnbundlingJobClass, Input, Input->getType()) {}
+
+void PartialLinkerJobAction::anchor() {}
+
+PartialLinkerJobAction::PartialLinkerJobAction(ActionList &Inputs)
+ : JobAction(PartialLinkerJobClass, Inputs, Inputs.front()->getType()) {}
Index: include/clang/Driver/ToolChain.h
===================================================================
--- include/clang/Driver/ToolChain.h
+++ include/clang/Driver/ToolChain.h
@@ -126,12 +126,14 @@
mutable std::unique_ptr<Tool> Assemble;
mutable std::unique_ptr<Tool> Link;
mutable std::unique_ptr<Tool> OffloadBundler;
+ mutable std::unique_ptr<Tool> PartialLinker;
Tool *getClang() const;
Tool *getAssemble() const;
Tool *getLink() const;
Tool *getClangAs() const;
Tool *getOffloadBundler() const;
+ Tool *getPartialLinker() const;
mutable std::unique_ptr<SanitizerArgs> SanitizerArguments;
mutable std::unique_ptr<XRayArgs> XRayArguments;
Index: include/clang/Driver/Driver.h
===================================================================
--- include/clang/Driver/Driver.h
+++ include/clang/Driver/Driver.h
@@ -256,7 +256,7 @@
llvm::opt::DerivedArgList *
TranslateInputArgs(const llvm::opt::InputArgList &Args) const;
- // getFinalPhase - Determine which compilation mode we are in and record
+ // getFinalPhase - Determine which compilation mode we are in and record
// which option we used to determine the final phase.
phases::ID getFinalPhase(const llvm::opt::DerivedArgList &DAL,
llvm::opt::Arg **FinalPhaseArg = nullptr) const;
@@ -363,12 +363,12 @@
llvm::opt::InputArgList ParseArgStrings(ArrayRef<const char *> Args,
bool &ContainsError);
- /// BuildInputs - Construct the list of inputs and their types from
+ /// BuildInputs - Construct the list of inputs and their types from
/// the given arguments.
///
/// \param TC - The default host tool chain.
/// \param Args - The input arguments.
- /// \param Inputs - The list to store the resulting compilation
+ /// \param Inputs - The list to store the resulting compilation
/// inputs onto.
void BuildInputs(const ToolChain &TC, llvm::opt::DerivedArgList &Args,
InputList &Inputs) const;
@@ -491,16 +491,16 @@
/// \param JA - The action of interest.
/// \param BaseInput - The original input file that this action was
/// triggered by.
- /// \param BoundArch - The bound architecture.
+ /// \param BoundArch - The bound architecture.
/// \param AtTopLevel - Whether this is a "top-level" action.
/// \param MultipleArchs - Whether multiple -arch options were supplied.
/// \param NormalizedTriple - The normalized triple of the relevant target.
const char *GetNamedOutputPath(Compilation &C, const JobAction &JA,
const char *BaseInput, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs,
StringRef NormalizedTriple) const;
- /// GetTemporaryPath - Return the pathname of a temporary file to use
+ /// GetTemporaryPath - Return the pathname of a temporary file to use
/// as part of compilation; the file will have the given prefix and suffix.
///
/// GCC goes to extra lengths here to be a bit more robust.
Index: include/clang/Driver/Compilation.h
===================================================================
--- include/clang/Driver/Compilation.h
+++ include/clang/Driver/Compilation.h
@@ -122,6 +122,9 @@
/// Whether an error during the parsing of the input args.
bool ContainsError;
+ /// Whether the clang-offload-bundler can be skipped.
+ bool skipOffloadBundler = false;
+
public:
Compilation(const Driver &D, const ToolChain &DefaultToolChain,
llvm::opt::InputArgList *Args,
@@ -301,6 +304,16 @@
/// of three. The inferior process's stdin(0), stdout(1), and stderr(2) will
/// be redirected to the corresponding paths, if provided (not llvm::None).
void Redirect(ArrayRef<Optional<StringRef>> Redirects);
+
+ /// Set whether the compilation can avoid calling the clang-offload-bundler
+ /// for object file types.
+ ///
+ /// \param skipBundler - bool value set once by the driver.
+ void setSkipOffloadBundler(bool skipBundler);
+
+ /// Returns true when calls to the clang-offload-bundler are not required
+ /// for object types.
+ bool canSkipOffloadBundler() const;
};
} // namespace driver
Index: include/clang/Driver/Action.h
===================================================================
--- include/clang/Driver/Action.h
+++ include/clang/Driver/Action.h
@@ -71,9 +71,10 @@
VerifyPCHJobClass,
OffloadBundlingJobClass,
OffloadUnbundlingJobClass,
+ PartialLinkerJobClass,
JobClassFirst = PreprocessJobClass,
- JobClassLast = OffloadUnbundlingJobClass
+ JobClassLast = PartialLinkerJobClass
};
// The offloading kind determines if this action is binded to a particular
@@ -589,6 +590,18 @@
}
};
+class PartialLinkerJobAction : public JobAction {
+ void anchor() override;
+
+public:
+ // Partial linking does not change the type of output.
+ PartialLinkerJobAction(ActionList &Inputs);
+
+ static bool classof(const Action *A) {
+ return A->getKind() == PartialLinkerJobClass;
+ }
+};
+
} // namespace driver
} // namespace clang
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits