https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/196586
>From 587ee4a706921e713618ad2c5dff2b8c00331b46 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Fri, 8 May 2026 11:19:48 +0100 Subject: [PATCH] clang/AMDGPU: Pass BoundArch through device libs handling Pre-work to consolidate target identification for future target option bug fixes. Also requires updating flang to match recent clang changes. Co-authored-by: Claude Sonnet 4 <[email protected]> --- clang/include/clang/Driver/ToolChain.h | 2 +- clang/lib/Driver/ToolChain.cpp | 2 +- clang/lib/Driver/ToolChains/AMDGPU.cpp | 14 ++++++++---- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 13 ++++------- clang/lib/Driver/ToolChains/AMDGPUOpenMP.h | 2 +- clang/lib/Driver/ToolChains/Flang.cpp | 24 ++++++++++++-------- clang/lib/Driver/ToolChains/Flang.h | 16 ++++++++++--- clang/lib/Driver/ToolChains/HIPAMD.cpp | 15 +++++++----- clang/lib/Driver/ToolChains/HIPAMD.h | 2 +- clang/lib/Driver/ToolChains/HIPSPV.cpp | 4 ++-- clang/lib/Driver/ToolChains/HIPSPV.h | 2 +- 11 files changed, 57 insertions(+), 39 deletions(-) diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 453af0783b445..8953c299268df 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -868,7 +868,7 @@ class ToolChain { /// Get paths for device libraries. virtual llvm::SmallVector<BitCodeLibraryInfo, 12> - getDeviceLibs(const llvm::opt::ArgList &Args, + getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, const Action::OffloadKind DeviceOffloadingKind) const; /// Add the system specific libraries for the active offload kinds. diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 1123a41524182..fc4bcfc2fe865 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1872,7 +1872,7 @@ void ToolChain::addSYCLIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const {} llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> -ToolChain::getDeviceLibs(const ArgList &DriverArgs, +ToolChain::getDeviceLibs(const ArgList &DriverArgs, StringRef BoundArch, const Action::OffloadKind DeviceOffloadingKind) const { return {}; } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 4320d1480be61..b51ad4e66edb7 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -978,11 +978,16 @@ void ROCMToolChain::addClangTargetOptions( if (TT.getEnvironment() == llvm::Triple::LLVM) return; - AMDGPUToolChain::ParsedTargetIDType TargetID = getParsedTargetID(DriverArgs); - StringRef GpuArch = - TargetID.OptionalGPUArch ? *TargetID.OptionalGPUArch : StringRef(); + // Get the device name and canonicalize it. For offload compilation, + // BoundArch contains the full target ID. For non-offload (OpenCL), + // fall back to -mcpu. + StringRef TargetID = BoundArch.empty() + ? DriverArgs.getLastArgValue(options::OPT_mcpu_EQ) + : BoundArch; + StringRef GpuArch = getProcessorFromTargetID(getTriple(), TargetID); StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(GpuArch); + auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( getAMDGPUCodeObjectVersion(getDriver(), DriverArgs)); if (!RocmInstallation->checkCommonBitcodeLibs(GpuArch, LibDeviceFile, ABIVer)) @@ -995,8 +1000,7 @@ void ROCMToolChain::addClangTargetOptions( // Add the generic set of libraries. BCLibs.append(RocmInstallation->getCommonBitcodeLibs( DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind, - getSanitizerArgs(DriverArgs, TargetID.OptionalTargetID.value_or(""), - DeviceOffloadingKind) + getSanitizerArgs(DriverArgs, TargetID, DeviceOffloadingKind) .needsAsanRt())); for (auto [BCFile, Internalize] : BCLibs) { diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 1d3568321438f..e7a169a374464 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -43,7 +43,8 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( true)) return; - for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) { + for (auto BCFile : + getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); @@ -122,19 +123,15 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> AMDGPUOpenMPToolChain::getDeviceLibs( - const llvm::opt::ArgList &Args, + const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, const Action::OffloadKind DeviceOffloadingKind) const { if (!Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true)) return {}; - AMDGPUToolChain::ParsedTargetIDType TargetID = getParsedTargetID(Args); - if (!TargetID.OptionalTargetID) - return {}; - + StringRef GpuArch = getProcessorFromTargetID(getTriple(), BoundArch); SmallVector<BitCodeLibraryInfo, 12> BCLibs; for (auto BCLib : - getCommonDeviceLibNames(Args, *TargetID.OptionalTargetID, - *TargetID.OptionalGPUArch, DeviceOffloadingKind)) + getCommonDeviceLibNames(Args, BoundArch, GpuArch, DeviceOffloadingKind)) BCLibs.emplace_back(BCLib); return BCLibs; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h index b1bcb08899c46..7e212f15a9ebc 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -58,7 +58,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final const llvm::opt::ArgList &Args) const override; llvm::SmallVector<BitCodeLibraryInfo, 12> - getDeviceLibs(const llvm::opt::ArgList &Args, + getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, const Action::OffloadKind DeviceOffloadKind) const override; /// OpenMP uses LTO by default to link device bitcode. diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index c713b7904b22c..fcbbcbcc7018b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -522,8 +522,9 @@ static void processVSRuntimeLibrary(const ToolChain &TC, const ArgList &Args, } } -void Flang::AddAMDGPUTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { +void Flang::AddAMDGPUTargetArgs(const ArgList &Args, ArgStringList &CmdArgs, + StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const { if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) { StringRef Val = A->getValue(); CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val)); @@ -533,11 +534,12 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args, } const ToolChain &TC = getToolChain(); - TC.addClangTargetOptions(Args, CmdArgs, "", Action::OffloadKind::OFK_OpenMP); + TC.addClangTargetOptions(Args, CmdArgs, BoundArch, DeviceOffloadKind); } -void Flang::AddNVPTXTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { +void Flang::AddNVPTXTargetArgs(const ArgList &Args, ArgStringList &CmdArgs, + StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const { // we cannot use addClangTargetOptions, as it appends unsupported args for // flang: -fcuda-is-device, -fno-threadsafe-statics, // -fcuda-allow-variadic-functions and -target-sdk-version Instead we manually @@ -572,8 +574,9 @@ void Flang::AddNVPTXTargetArgs(const ArgList &Args, CmdArgs.push_back(Args.MakeArgString(LibDeviceFile)); } -void Flang::addTargetOptions(const ArgList &Args, - ArgStringList &CmdArgs) const { +void Flang::addTargetOptions(const ArgList &Args, ArgStringList &CmdArgs, + StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const { const ToolChain &TC = getToolChain(); const llvm::Triple &Triple = TC.getEffectiveTriple(); const Driver &D = TC.getDriver(); @@ -599,11 +602,11 @@ void Flang::addTargetOptions(const ArgList &Args, case llvm::Triple::r600: case llvm::Triple::amdgcn: getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); - AddAMDGPUTargetArgs(Args, CmdArgs); + AddAMDGPUTargetArgs(Args, CmdArgs, BoundArch, DeviceOffloadKind); break; case llvm::Triple::nvptx: case llvm::Triple::nvptx64: - AddNVPTXTargetArgs(Args, CmdArgs); + AddNVPTXTargetArgs(Args, CmdArgs, BoundArch, DeviceOffloadKind); break; case llvm::Triple::riscv64: getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); @@ -1111,7 +1114,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, addFloatingPointOptions(D, Args, CmdArgs); // Add target args, features, etc. - addTargetOptions(Args, CmdArgs); + addTargetOptions(Args, CmdArgs, JA.getOffloadingArch(), + JA.getOffloadingDeviceKind()); if (!TC.useIntegratedAs()) CmdArgs.push_back("-no-integrated-as"); diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h index 62d2c6bb2a093..f08baa0fd5c12 100644 --- a/clang/lib/Driver/ToolChains/Flang.h +++ b/clang/lib/Driver/ToolChains/Flang.h @@ -61,8 +61,12 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool { /// /// \param [in] Args The list of input driver arguments /// \param [out] CmdArgs The list of output command arguments + /// \param [in] BoundArch The bound architecture for offload compilation + /// \param [in] DeviceOffloadKind The offload kind void addTargetOptions(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const; + llvm::opt::ArgStringList &CmdArgs, + llvm::StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const; /// Add specific options for AArch64 target. /// @@ -75,11 +79,17 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool { /// /// \param [in] Args The list of input driver arguments /// \param [out] CmdArgs The list of output command arguments + /// \param [in] BoundArch The bound architecture for offload compilation + /// \param [in] DeviceOffloadKind The offload kind void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const; + llvm::opt::ArgStringList &CmdArgs, + llvm::StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const; void AddNVPTXTargetArgs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const; + llvm::opt::ArgStringList &CmdArgs, + llvm::StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const; /// Add specific options for LoongArch64 target. /// diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 26ce048c4c69d..4adb84f353d25 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -282,7 +282,8 @@ void HIPAMDToolChain::addClangTargetOptions( return; // No DeviceLibs for SPIR-V. } - for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) { + for (auto BCFile : + getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); CC1Args.push_back(DriverArgs.MakeArgStringRef(BCFile.Path)); @@ -371,7 +372,10 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs, + llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { + assert(!BoundArch.empty() && "Must have an explicit GPU arch."); + llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; const llvm::Triple &TT = getEffectiveTriple(); @@ -380,8 +384,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs, TT.getEnvironment() == llvm::Triple::LLVM) return {}; - AMDGPUToolChain::ParsedTargetIDType TargetID = getParsedTargetID(DriverArgs); - if (!TargetID.OptionalTargetID || TargetID.OptionalTargetID == "amdgcnspirv") + StringRef GpuArch = getProcessorFromTargetID(getTriple(), BoundArch); + if (GpuArch == "amdgcnspirv") return {}; ArgStringList LibraryPaths; @@ -418,9 +422,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs, } // Add common device libraries like ocml etc. - for (auto N : getCommonDeviceLibNames( - DriverArgs, *TargetID.OptionalTargetID, *TargetID.OptionalGPUArch, - DeviceOffloadingKind)) + for (auto N : getCommonDeviceLibNames(DriverArgs, BoundArch, GpuArch, + DeviceOffloadingKind)) BCLibs.emplace_back(N); // Add instrument lib. diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h index e0b72eda0dd6c..8277119bf9348 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.h +++ b/clang/lib/Driver/ToolChains/HIPAMD.h @@ -83,7 +83,7 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain { void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector<BitCodeLibraryInfo, 12> - getDeviceLibs(const llvm::opt::ArgList &Args, + getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override; VersionTuple diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp index a4177a8a6665d..0d93e55137889 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.cpp +++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp @@ -181,7 +181,7 @@ void HIPSPVToolChain::addClangTargetOptions( {"-fvisibility=hidden", "-fapply-global-visibility-to-externs"}); for (const BitCodeLibraryInfo &BCFile : - getDeviceLibs(DriverArgs, DeviceOffloadingKind)) + getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) CC1Args.append( {"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)}); } @@ -243,7 +243,7 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> HIPSPVToolChain::getDeviceLibs( - const llvm::opt::ArgList &DriverArgs, + const llvm::opt::ArgList &DriverArgs, llvm::StringRef BoundArch, const Action::OffloadKind DeviceOffloadingKind) const { llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs; if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h index f7f50e1f36688..8e2fd91a4b7ac 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.h +++ b/clang/lib/Driver/ToolChains/HIPSPV.h @@ -74,7 +74,7 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain { void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector<BitCodeLibraryInfo, 12> - getDeviceLibs(const llvm::opt::ArgList &Args, + getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, const Action::OffloadKind DeviceOffloadKind) const override; SanitizerMask _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
