jhuber6 created this revision. jhuber6 added reviewers: jdoerfert, JonChesterfield, ronlieb, arsenm, yaxunl, tianshilei1992, ye-luo. Herald added subscribers: kosarev, kerbowa, guansong, t-tye, tpr, dstuttard, jvesely, kzhuravl. Herald added a project: All. jhuber6 requested review of this revision. Herald added subscribers: cfe-commits, sstefan1, MaskRay, wdng. Herald added a project: clang.
Previously, we linked in the ROCm device libraries which provide math and other utility functions late. This is not stricly correct as this library contains several flags that are only set per-TU, such as fast math or denormalization. This patch changes this to pass the bitcode libraries per-TU using the same method we use for the CUDA libraries. This has the advantage that we correctly propagate attributes making this implementation more correct. Additionally, many annoying unused functions were not being fully removed during LTO. This lead to erroneous warning messages and remarks on unused functions. I am not sure if not finding these libraries should be a hard error. let me know if it should be demoted to a warning saying that some device utilities will not work without them. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D133726 Files: clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.h clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/amdgpu-openmp-toolchain.c
Index: clang/test/Driver/amdgpu-openmp-toolchain.c =================================================================== --- clang/test/Driver/amdgpu-openmp-toolchain.c +++ clang/test/Driver/amdgpu-openmp-toolchain.c @@ -49,5 +49,7 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR // CHECK-EMIT-LLVM-IR: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm" -// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NEW -// CHECK-LIB-DEVICE-NEW: {{.*}}clang-linker-wrapper{{.*}}--bitcode-library=openmp-amdgcn-amd-amdhsa-gfx803={{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" +// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 \ +// RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NEW +// CHECK-LIB-DEVICE-NEW: "-cc1" {{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -8367,7 +8367,6 @@ const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); const llvm::Triple TheTriple = getToolChain().getTriple(); - auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>(); ArgStringList CmdArgs; // Pass the CUDA path to the linker wrapper tool. @@ -8385,30 +8384,6 @@ } } - // Get the AMDGPU math libraries. - // FIXME: This method is bad, remove once AMDGPU has a proper math library - // (see AMDGCN::OpenMPLinker::constructLLVMLinkCommand). - for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) { - const ToolChain *TC = I.second; - - if (!TC->getTriple().isAMDGPU() || Args.hasArg(options::OPT_nogpulib)) - continue; - - const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP); - StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ); - const toolchains::ROCMToolChain RocmTC(TC->getDriver(), TC->getTriple(), - TCArgs); - - SmallVector<std::string, 12> BCLibs = - RocmTC.getCommonDeviceLibNames(TCArgs, Arch.str()); - - for (StringRef LibName : BCLibs) - CmdArgs.push_back(Args.MakeArgString( - "--bitcode-library=" + - Action::GetOffloadKindName(Action::OFK_OpenMP) + "-" + - TC->getTripleString() + "-" + Arch + "=" + LibName)); - } - if (D.isUsingLTO(/* IsOffload */ true)) { // Pass in the optimization level to use for LTO. if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) { Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.h =================================================================== --- clang/lib/Driver/ToolChains/AMDGPUOpenMP.h +++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -54,6 +54,9 @@ computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override; + llvm::SmallVector<BitCodeLibraryInfo, 12> + getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override; + const ToolChain &HostTC; }; Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -75,6 +75,12 @@ if (DriverArgs.hasArg(options::OPT_nogpulib)) return; + for (auto BCFile : getHIPDeviceLibs(DriverArgs)) { + CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" + : "-mlink-bitcode-file"); + CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); + } + // Link the bitcode library late if we're using device LTO. if (getDriver().isUsingLTO(/* IsOffload */ true)) return; @@ -158,3 +164,23 @@ const ArgList &Args) const { return HostTC.computeMSVCVersion(D, Args); } + +llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> +AMDGPUOpenMPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &Args) const { + if (Args.hasArg(options::OPT_nogpulib)) + return {}; + + if (!RocmInstallation.hasDeviceLibrary()) { + getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; + return {}; + } + + StringRef GpuArch = getGPUArch(Args); + + SmallVector<BitCodeLibraryInfo, 12> BCLibs; + for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(), + /*IsOpenMP=*/true)) + BCLibs.emplace_back(BCLib); + + return BCLibs; +} Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -714,8 +714,13 @@ StringRef AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { - return getProcessorFromTargetID( - getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); + if (DriverArgs.hasArg(options::OPT_mcpu_EQ)) + return getProcessorFromTargetID( + getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); + if (DriverArgs.hasArg(options::OPT_march_EQ)) + return getProcessorFromTargetID( + getTriple(), DriverArgs.getLastArgValue(options::OPT_march_EQ)); + return ""; } AMDGPUToolChain::ParsedTargetIDType
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits