jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, JonChesterfield, ronlieb, arsenm, yaxunl, 
tianshilei1992, ye-luo.
Herald added subscribers: kosarev, kerbowa, guansong, t-tye, tpr, dstuttard, 
jvesely, kzhuravl.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, MaskRay, wdng.
Herald added a project: clang.

Previously, we linked in the ROCm device libraries which provide math
and other utility functions late. This is not stricly correct as this
library contains several flags that are only set per-TU, such as fast
math or denormalization. This patch changes this to pass the bitcode
libraries per-TU using the same method we use for the CUDA libraries.
This has the advantage that we correctly propagate attributes making
this implementation more correct. Additionally, many annoying unused
functions were not being fully removed during LTO. This lead to
erroneous warning messages and remarks on unused functions.

I am not sure if not finding these libraries should be a hard error. let
me know if it should be demoted to a warning saying that some device
utilities will not work without them.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D133726

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/amdgpu-openmp-toolchain.c

Index: clang/test/Driver/amdgpu-openmp-toolchain.c
===================================================================
--- clang/test/Driver/amdgpu-openmp-toolchain.c
+++ clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -49,5 +49,7 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
 
-// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NEW
-// CHECK-LIB-DEVICE-NEW: {{.*}}clang-linker-wrapper{{.*}}--bitcode-library=openmp-amdgcn-amd-amdhsa-gfx803={{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 \
+// RUN:   --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s  2>&1 | \
+// RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NEW
+// CHECK-LIB-DEVICE-NEW: "-cc1" {{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -8367,7 +8367,6 @@
                                  const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   const llvm::Triple TheTriple = getToolChain().getTriple();
-  auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
@@ -8385,30 +8384,6 @@
     }
   }
 
-  // Get the AMDGPU math libraries.
-  // FIXME: This method is bad, remove once AMDGPU has a proper math library
-  // (see AMDGCN::OpenMPLinker::constructLLVMLinkCommand).
-  for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-    const ToolChain *TC = I.second;
-
-    if (!TC->getTriple().isAMDGPU() || Args.hasArg(options::OPT_nogpulib))
-      continue;
-
-    const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
-    StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ);
-    const toolchains::ROCMToolChain RocmTC(TC->getDriver(), TC->getTriple(),
-                                           TCArgs);
-
-    SmallVector<std::string, 12> BCLibs =
-        RocmTC.getCommonDeviceLibNames(TCArgs, Arch.str());
-
-    for (StringRef LibName : BCLibs)
-      CmdArgs.push_back(Args.MakeArgString(
-          "--bitcode-library=" +
-          Action::GetOffloadKindName(Action::OFK_OpenMP) + "-" +
-          TC->getTripleString() + "-" + Arch + "=" + LibName));
-  }
-
   if (D.isUsingLTO(/* IsOffload */ true)) {
     // Pass in the optimization level to use for LTO.
     if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -54,6 +54,9 @@
   computeMSVCVersion(const Driver *D,
                      const llvm::opt::ArgList &Args) const override;
 
+  llvm::SmallVector<BitCodeLibraryInfo, 12>
+  getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override;
+
   const ToolChain &HostTC;
 };
 
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -75,6 +75,12 @@
   if (DriverArgs.hasArg(options::OPT_nogpulib))
     return;
 
+  for (auto BCFile : getHIPDeviceLibs(DriverArgs)) {
+    CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
+                                               : "-mlink-bitcode-file");
+    CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
+  }
+
   // Link the bitcode library late if we're using device LTO.
   if (getDriver().isUsingLTO(/* IsOffload */ true))
     return;
@@ -158,3 +164,23 @@
                                           const ArgList &Args) const {
   return HostTC.computeMSVCVersion(D, Args);
 }
+
+llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
+AMDGPUOpenMPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &Args) const {
+  if (Args.hasArg(options::OPT_nogpulib))
+    return {};
+
+  if (!RocmInstallation.hasDeviceLibrary()) {
+    getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
+    return {};
+  }
+
+  StringRef GpuArch = getGPUArch(Args);
+
+  SmallVector<BitCodeLibraryInfo, 12> BCLibs;
+  for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(),
+                                            /*IsOpenMP=*/true))
+    BCLibs.emplace_back(BCLib);
+
+  return BCLibs;
+}
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -714,8 +714,13 @@
 
 StringRef
 AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
-  return getProcessorFromTargetID(
-      getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
+  if (DriverArgs.hasArg(options::OPT_mcpu_EQ))
+    return getProcessorFromTargetID(
+        getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
+  if (DriverArgs.hasArg(options::OPT_march_EQ))
+    return getProcessorFromTargetID(
+        getTriple(), DriverArgs.getLastArgValue(options::OPT_march_EQ));
+  return "";
 }
 
 AMDGPUToolChain::ParsedTargetIDType
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to