jhuber6 created this revision. jhuber6 added reviewers: jdoerfert, ronlieb, gregrodgers, JonChesterfield. Herald added subscribers: kerbowa, guansong, inglorion, yaxunl, nhaehnle, jvesely. jhuber6 requested review of this revision. Herald added subscribers: cfe-commits, sstefan1. Herald added a project: clang.
This patch adds support for linking the OpenMP device bitcode library late when doing LTO. This simply passes it in as an additional device file when doing the final device linking phase with LTO. This has the advantage that we don't link it multiple times, and the device references do not get inlined and prevent us from doing needed OpenMP optimizations when we have visiblity of the whole module. Depends on D116975 <https://reviews.llvm.org/D116975> Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D117048 Files: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/Cuda.cpp clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp =================================================================== --- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -71,6 +71,11 @@ cl::init("O0"), cl::cat(ClangLinkerWrapperCategory)); +static cl::opt<std::string> + BitcodeLibrary("target-library", + cl::desc("Path for the target bitcode library"), + cl::cat(ClangLinkerWrapperCategory)); + // Do not parse linker options. static cl::list<std::string> HostLinkerArgs(cl::Sink, cl::desc("<options to be passed to linker>...")); @@ -976,6 +981,14 @@ } } + // Add the device bitcode library to the device files if it was passed in. + if (!BitcodeLibrary.empty()) { + auto DeviceAndPath = StringRef(BitcodeLibrary).split('='); + auto TripleAndArch = DeviceAndPath.first.rsplit('-'); + DeviceFiles.emplace_back(TripleAndArch.first, TripleAndArch.second, + DeviceAndPath.second); + } + // Link the device images extracted from the linker input. SmallVector<std::string, 16> LinkedImages; if (Error Err = linkDeviceFiles(DeviceFiles, LinkerArgs, LinkedImages)) Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -744,6 +744,10 @@ return; } + // Link the bitcode library late if we're using device LTO. + if (getDriver().isUsingLTO(/* IsOffload */ true)) + return; + std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, true)) Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -8147,6 +8147,34 @@ "-target-feature=" + TC->getTripleString() + "=" + *(FeatureIt + 1))); } + // Pass in the bitcode library to be linked during LTO. + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; + ++TI) { + const ToolChain *TC = TI->second; + const Driver &D = TC->getDriver(); + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ); + + std::string BitcodeSuffix; + if (TCArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, + options::OPT_fno_openmp_target_new_runtime, true)) + BitcodeSuffix += "new-"; + if (TC->getTriple().isNVPTX()) + BitcodeSuffix += "nvptx-"; + else if (TC->getTriple().isAMDGPU()) + BitcodeSuffix += "amdgpu-"; + BitcodeSuffix += Arch; + + ArgStringList BitcodeLibrary; + addOpenMPDeviceRTL(D, TCArgs, BitcodeLibrary, BitcodeSuffix, + TC->getTriple()); + + if (!BitcodeLibrary.empty()) + CmdArgs.push_back( + Args.MakeArgString("-target-library=" + TC->getTripleString() + + "-" + Arch + "=" + BitcodeLibrary.back())); + } + // Pass in the optimization level to use for LTO. if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { StringRef OOpt; Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -264,6 +264,10 @@ if (DriverArgs.hasArg(options::OPT_nogpulib)) return; + // Link the bitcode library late if we're using device LTO. + if (getDriver().isUsingLTO(/* IsOffload */ true)) + return; + std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, true))
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp =================================================================== --- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -71,6 +71,11 @@ cl::init("O0"), cl::cat(ClangLinkerWrapperCategory)); +static cl::opt<std::string> + BitcodeLibrary("target-library", + cl::desc("Path for the target bitcode library"), + cl::cat(ClangLinkerWrapperCategory)); + // Do not parse linker options. static cl::list<std::string> HostLinkerArgs(cl::Sink, cl::desc("<options to be passed to linker>...")); @@ -976,6 +981,14 @@ } } + // Add the device bitcode library to the device files if it was passed in. + if (!BitcodeLibrary.empty()) { + auto DeviceAndPath = StringRef(BitcodeLibrary).split('='); + auto TripleAndArch = DeviceAndPath.first.rsplit('-'); + DeviceFiles.emplace_back(TripleAndArch.first, TripleAndArch.second, + DeviceAndPath.second); + } + // Link the device images extracted from the linker input. SmallVector<std::string, 16> LinkedImages; if (Error Err = linkDeviceFiles(DeviceFiles, LinkerArgs, LinkedImages)) Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -744,6 +744,10 @@ return; } + // Link the bitcode library late if we're using device LTO. + if (getDriver().isUsingLTO(/* IsOffload */ true)) + return; + std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, true)) Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -8147,6 +8147,34 @@ "-target-feature=" + TC->getTripleString() + "=" + *(FeatureIt + 1))); } + // Pass in the bitcode library to be linked during LTO. + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; + ++TI) { + const ToolChain *TC = TI->second; + const Driver &D = TC->getDriver(); + const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); + StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ); + + std::string BitcodeSuffix; + if (TCArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, + options::OPT_fno_openmp_target_new_runtime, true)) + BitcodeSuffix += "new-"; + if (TC->getTriple().isNVPTX()) + BitcodeSuffix += "nvptx-"; + else if (TC->getTriple().isAMDGPU()) + BitcodeSuffix += "amdgpu-"; + BitcodeSuffix += Arch; + + ArgStringList BitcodeLibrary; + addOpenMPDeviceRTL(D, TCArgs, BitcodeLibrary, BitcodeSuffix, + TC->getTriple()); + + if (!BitcodeLibrary.empty()) + CmdArgs.push_back( + Args.MakeArgString("-target-library=" + TC->getTripleString() + + "-" + Arch + "=" + BitcodeLibrary.back())); + } + // Pass in the optimization level to use for LTO. if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { StringRef OOpt; Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -264,6 +264,10 @@ if (DriverArgs.hasArg(options::OPT_nogpulib)) return; + // Link the bitcode library late if we're using device LTO. + if (getDriver().isUsingLTO(/* IsOffload */ true)) + return; + std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, true))
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits