jhuber6 created this revision. jhuber6 added reviewers: jdoerfert, tianshilei1992, JonChesterfield, ABataev. Herald added subscribers: guansong, inglorion, yaxunl. Herald added a project: All. jhuber6 requested review of this revision. Herald added subscribers: cfe-commits, sstefan1, MaskRay. Herald added a project: clang.
The previous patches allowed us to create a static library containing all the device code. This patch uses that library to perform the device runtime linking late when performing LTO. This in addition to simplifying the libraries, allows us to transparently handle the runtime library as-needed without needing Clang to manually pass the necessary library in the linker wrapper job. Depends on D125315 <https://reviews.llvm.org/D125315> Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D125333 Files: clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/CommonArgs.cpp clang/test/Driver/openmp-offload-gpu-new.c Index: clang/test/Driver/openmp-offload-gpu-new.c =================================================================== --- clang/test/Driver/openmp-offload-gpu-new.c +++ clang/test/Driver/openmp-offload-gpu-new.c @@ -89,3 +89,8 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP // CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \ +// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBRARY %s + +// CHECK-LTO-LIBRARY: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl Index: clang/lib/Driver/ToolChains/CommonArgs.cpp =================================================================== --- clang/lib/Driver/ToolChains/CommonArgs.cpp +++ clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -736,6 +736,9 @@ if (IsOffloadingHost) CmdArgs.push_back("-lomptarget"); + if (IsOffloadingHost && TC.getDriver().isUsingLTO(/* IsOffload */ true)) + CmdArgs.push_back("-lomptarget.devicertl"); + addArchSpecificRPath(TC, Args, CmdArgs); if (RTKind == Driver::OMPRT_OMP) Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -8301,28 +8301,6 @@ "=" + *(FeatureIt + 1))); } - // Pass in the bitcode library to be linked during LTO. - for (auto &I : - llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { - const ToolChain *TC = I.second; - if (!(TC->getTriple().isNVPTX() || TC->getTriple().isAMDGPU())) - continue; - - const Driver &TCDriver = TC->getDriver(); - const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); - StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ); - - ArgStringList BitcodeLibrary; - addOpenMPDeviceRTL(TCDriver, TCArgs, BitcodeLibrary, Arch, - TC->getTriple()); - - if (!BitcodeLibrary.empty()) - CmdArgs.push_back(Args.MakeArgString( - "-target-library=" + - Action::GetOffloadKindName(Action::OFK_OpenMP) + "-" + - TC->getTripleString() + "-" + Arch + "=" + BitcodeLibrary.back())); - } - // Pass in the optimization level to use for LTO. if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { StringRef OOpt;
Index: clang/test/Driver/openmp-offload-gpu-new.c =================================================================== --- clang/test/Driver/openmp-offload-gpu-new.c +++ clang/test/Driver/openmp-offload-gpu-new.c @@ -89,3 +89,8 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP // CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \ +// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBRARY %s + +// CHECK-LTO-LIBRARY: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl Index: clang/lib/Driver/ToolChains/CommonArgs.cpp =================================================================== --- clang/lib/Driver/ToolChains/CommonArgs.cpp +++ clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -736,6 +736,9 @@ if (IsOffloadingHost) CmdArgs.push_back("-lomptarget"); + if (IsOffloadingHost && TC.getDriver().isUsingLTO(/* IsOffload */ true)) + CmdArgs.push_back("-lomptarget.devicertl"); + addArchSpecificRPath(TC, Args, CmdArgs); if (RTKind == Driver::OMPRT_OMP) Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -8301,28 +8301,6 @@ "=" + *(FeatureIt + 1))); } - // Pass in the bitcode library to be linked during LTO. - for (auto &I : - llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { - const ToolChain *TC = I.second; - if (!(TC->getTriple().isNVPTX() || TC->getTriple().isAMDGPU())) - continue; - - const Driver &TCDriver = TC->getDriver(); - const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); - StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ); - - ArgStringList BitcodeLibrary; - addOpenMPDeviceRTL(TCDriver, TCArgs, BitcodeLibrary, Arch, - TC->getTriple()); - - if (!BitcodeLibrary.empty()) - CmdArgs.push_back(Args.MakeArgString( - "-target-library=" + - Action::GetOffloadKindName(Action::OFK_OpenMP) + "-" + - TC->getTripleString() + "-" + Arch + "=" + BitcodeLibrary.back())); - } - // Pass in the optimization level to use for LTO. if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { StringRef OOpt;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits