https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/147823
>From 7f9ea74bfb79084bc4000b22acda68e9e9c996b9 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Wed, 9 Jul 2025 15:32:57 -0500 Subject: [PATCH] [Clang] Extract offloading code from static libs with 'offload-arch=' Summary: The semantics of static libraries only extract stuff that's used. We somewhat extend this behavior with the linker wrapper only doing this to fat binaries that match any found architectures. However, this has some unfortunate effects when the user uses static libraries. This is somewhat of a hack, but we now assume that if the user specified `--offload-arch=` on the link job, they *definitely* want that architecture to be used if it exists. This patch just forces extraction of those libraries which resolves an issue observed with some customers. The old behavior will still be present if the user does `--offload-link` with no offloading architecture present, and for the vast, vast majority of cases this will change nothing. Fixes: https://github.com/llvm/llvm-project/issues/147788 --- clang/lib/Driver/ToolChains/Clang.cpp | 9 ++++++++- clang/test/Driver/openmp-offload-gpu.c | 9 +++++++++ .../tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 10 +++++++--- clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td | 4 ++++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b76163afc8aa4..8675e1bab315c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9152,7 +9152,9 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, // specific architecture via -Xarch_<cpu> will not be forwarded. ArgStringList CompilerArgs; ArgStringList LinkerArgs; - for (Arg *A : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) { + const DerivedArgList &ToolChainArgs = + C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind); + for (Arg *A : ToolChainArgs) { if (A->getOption().matches(OPT_Zlinker_input)) LinkerArgs.emplace_back(A->getValue()); else if (ShouldForward(CompilerOptions, A)) @@ -9161,6 +9163,11 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, A->render(Args, LinkerArgs); } + // If the user explicitly requested it via `--offload-arch` we should + // extract it from any static libraries if present. + for (StringRef Arg : ToolChainArgs.getAllArgValues(OPT_offload_arch_EQ)) + CmdArgs.emplace_back(Args.MakeArgString("--should-extract=" + Arg)); + // If this is OpenMP the device linker will need `-lompdevice`. if (Kind == Action::OFK_OpenMP && !Args.hasArg(OPT_no_offloadlib) && (TC->getTriple().isAMDGPU() || TC->getTriple().isNVPTX())) diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index 62e7c9588ce66..77f4cfb5f3a43 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -395,3 +395,12 @@ // RUN: --offload-arch=sm_52 -foffload-lto=thin -nogpulib -nogpuinc %s 2>&1 \ // RUN: | FileCheck --check-prefix=THINLTO-SM52 %s // THINLTO-SM52: --device-compiler=nvptx64-nvidia-cuda=-flto=thin + +// +// Check the requested architecture is passed if provided. +// +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: --offload-arch=gfx906 -foffload-lto=thin -nogpulib -nogpuinc %s 2>&1 \ +// RUN: | FileCheck --check-prefix=SHOULD-EXTRACT %s +// +// SHOULD-EXTRACT: clang-linker-wrapper{{.*}}"--should-extract=gfx906" diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 0f1fa8b329fd6..9d34b62da20f5 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1302,6 +1302,9 @@ getDeviceInput(const ArgList &Args) { // after every regular input file so that libraries may be included out of // order. This follows 'ld.lld' semantics which are more lenient. bool Extracted = true; + llvm::DenseSet<StringRef> ShouldExtract; + for (auto &Arg : Args.getAllArgValues(OPT_should_extract)) + ShouldExtract.insert(Arg); while (Extracted) { Extracted = false; for (OffloadFile &Binary : ArchiveFilesToExtract) { @@ -1315,8 +1318,9 @@ getDeviceInput(const ArgList &Args) { CompatibleTargets.emplace_back(ID); for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { - // Only extract an if we have an an object matching this target. - if (!InputFiles.count(ID)) + // Only extract an if we have an an object matching this target or it + // was specifically requested. + if (!InputFiles.count(ID) && !ShouldExtract.contains(ID.second)) continue; Expected<bool> ExtractOrErr = @@ -1330,7 +1334,7 @@ getDeviceInput(const ArgList &Args) { // Skip including the file if it is an archive that does not resolve // any symbols. - if (!Extracted) + if (!Extracted && !ShouldExtract.contains(ID.second)) continue; // If another target needs this binary it must be copied instead. diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index 17fb9db35fe39..fa73e02fd5178 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -59,6 +59,10 @@ def override_image : Joined<["--"], "override-image=">, Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">, HelpText<"Uses the provided file as if it were the output of the device link step">; +def should_extract : CommaJoined<["--"], "should-extract=">, + Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">, + HelpText<"Set of device architectures we should always extract if found.">; + // Flags passed to the device linker. def arch_EQ : Joined<["--"], "arch=">, Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"<arch>">, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits