https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/133164

>From 8c8885d486ac72145d7fd7ba003db7ea9ab17a1f Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Wed, 26 Mar 2025 16:03:20 -0500
Subject: [PATCH 1/3] [Clang] Make `--lto-partitions` only default for HIP

Summary:
The default behavior for LTO on other targets does not specify the
number of LTO partitions. Recent changes made this default to 8 on
AMDGPU which had some issues with the `libc` project. The option to
disable this is HIP only so I think for now we should restrict this just
to HIP.

I'm definitely on board with getting some more parallelism here, but I
think it should probably be restricted to just offloading languages. The
new driver goes through the `--target=amdgcn-amd-amdhsa` for its output,
which means we'd need to forward the default somehow.
---
 clang/lib/Driver/ToolChains/AMDGPU.cpp | 22 ++++++++--------------
 clang/test/Driver/amdgpu-toolchain.c   | 16 +---------------
 libc/startup/gpu/CMakeLists.txt        | 10 ++--------
 3 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index e919f4e941f47..c754cae2f96b2 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -625,6 +625,7 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
     CmdArgs.push_back("-shared");
   }
 
+  llvm::errs() << JA.getOffloadingDeviceKind() << "\n";
   addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   getToolChain().AddFilePathLibArgs(Args, CmdArgs);
@@ -633,7 +634,7 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
     const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin);
     addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO);
 
-    if (!ThinLTO)
+    if (!ThinLTO && JA.getOffloadingDeviceKind() == Action::OFK_HIP)
       addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs);
   } else if (Args.hasArg(options::OPT_mcpu_EQ)) {
     CmdArgs.push_back(Args.MakeArgString(
@@ -712,14 +713,12 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
 }
 
 static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
-  const Arg *A = Args.getLastArg(options::OPT_flto_partitions_EQ);
-  // In the absence of an option, use 8 as the default.
-  if (!A)
-    return 8;
   int Value = 0;
-  if (StringRef(A->getValue()).getAsInteger(10, Value) || (Value < 1)) {
+  StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
+  if (A.getAsInteger(10, Value) || (Value < 1)) {
+    Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
     D.Diag(diag::err_drv_invalid_int_value)
-        << A->getAsString(Args) << A->getValue();
+        << Arg->getAsString(Args) << Arg->getValue();
     return 1;
   }
 
@@ -729,13 +728,8 @@ static unsigned getFullLTOPartitions(const Driver &D, 
const ArgList &Args) {
 void amdgpu::addFullLTOPartitionOption(const Driver &D,
                                        const llvm::opt::ArgList &Args,
                                        llvm::opt::ArgStringList &CmdArgs) {
-  // TODO: Should this be restricted to fgpu-rdc only ? Currently we'll
-  //       also do it for non gpu-rdc LTO
-
-  if (unsigned NumParts = getFullLTOPartitions(D, Args); NumParts > 1) {
-    CmdArgs.push_back(
-        Args.MakeArgString("--lto-partitions=" + Twine(NumParts)));
-  }
+  CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" +
+                                       Twine(getFullLTOPartitions(D, Args))));
 }
 
 /// AMDGPU Toolchain
diff --git a/clang/test/Driver/amdgpu-toolchain.c 
b/clang/test/Driver/amdgpu-toolchain.c
index 6617108e59fcf..20656f1fadeb7 100644
--- a/clang/test/Driver/amdgpu-toolchain.c
+++ b/clang/test/Driver/amdgpu-toolchain.c
@@ -21,7 +21,7 @@
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- 
-nogpulib \
 // RUN:   -L. -flto -fconvergent-functions %s 2>&1 | FileCheck 
-check-prefix=LTO %s
 // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
-// LTO: 
ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions={{[0-9]+}}"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
+// LTO: 
ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- 
-nogpulib \
 // RUN:   -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
@@ -38,17 +38,3 @@
 // RUN: %clang -target amdgcn-amd-amdhsa -march=gfx90a -stdlib -startfiles \
 // RUN:   -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=STARTUP %s
 // STARTUP: ld.lld{{.*}}"-lc" "-lm" "{{.*}}crt1.o"
-
-// Check --flto-partitions
-
-// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
-// RUN:   -L. -flto --flto-partitions=42 %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS %s
-// LTO_PARTS: 
ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions=42"
-
-// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
-// RUN:   -L. -flto --flto-partitions=a %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS_INV0 %s
-// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in 
'--flto-partitions=a'
-
-// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
-// RUN:   -L. -flto --flto-partitions=0 %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS_INV1 %s
-// LTO_PARTS_INV1: clang: error: invalid integral value '0' in 
'--flto-partitions=0'
diff --git a/libc/startup/gpu/CMakeLists.txt b/libc/startup/gpu/CMakeLists.txt
index a30e6f07a66a3..fa326ef46a9d1 100644
--- a/libc/startup/gpu/CMakeLists.txt
+++ b/libc/startup/gpu/CMakeLists.txt
@@ -33,14 +33,8 @@ function(add_startup_object name)
     set_target_properties(${fq_target_name}.exe PROPERTIES
       RUNTIME_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR}
       RUNTIME_OUTPUT_NAME ${name}.o)
-    # FIXME: A bug in the AMDGPU LTO pass is incorrectly removing the kernels.
-    if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
-      target_link_options(${fq_target_name}.exe PRIVATE
-                          "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
-    else()
-      target_link_options(${fq_target_name}.exe PRIVATE
-                          "-r" "-nostdlib" "-Wl,--lto-emit-llvm")
-    endif()
+    target_link_options(${fq_target_name}.exe PRIVATE
+                        "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
   endif()
 endfunction()
 

>From 05c09d3e638e458f22a44180a995dd21f47c73a9 Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Wed, 26 Mar 2025 16:52:46 -0500
Subject: [PATCH 2/3] test

---
 clang/test/Driver/hip-toolchain-rdc.hip | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/clang/test/Driver/hip-toolchain-rdc.hip 
b/clang/test/Driver/hip-toolchain-rdc.hip
index 9015702e3211a..13b50353fc20c 100644
--- a/clang/test/Driver/hip-toolchain-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-rdc.hip
@@ -161,3 +161,17 @@
 // output the executable
 // LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] 
[[B_OBJ_HOST]] [[OBJBUNDLE]]
 // MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] 
[[B_OBJ_HOST]] [[OBJBUNDLE]]
+
+// Check --flto-partitions
+
+// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
+// RUN:   -L. -flto --flto-partitions=42 %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS %s
+// LTO_PARTS: lld{{.*}}"--lto-partitions=42"
+
+// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
+// RUN:   -L. -flto --flto-partitions=a %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS_INV0 %s
+// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in 
'--flto-partitions=a'
+
+// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
+// RUN:   -L. -flto --flto-partitions=0 %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS_INV1 %s
+// LTO_PARTS_INV1: clang: error: invalid integral value '0' in 
'--flto-partitions=0'

>From 00abe864e160f093927f74b9023973fa2674f4bd Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Wed, 26 Mar 2025 17:00:14 -0500
Subject: [PATCH 3/3] update

---
 clang/test/Driver/hip-toolchain-rdc.hip | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/clang/test/Driver/hip-toolchain-rdc.hip 
b/clang/test/Driver/hip-toolchain-rdc.hip
index 13b50353fc20c..d5119e71a3569 100644
--- a/clang/test/Driver/hip-toolchain-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-rdc.hip
@@ -165,13 +165,17 @@
 // Check --flto-partitions
 
 // RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
-// RUN:   -L. -flto --flto-partitions=42 %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS %s
+// RUN:   -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s
+// LTO_DEFAULT: lld{{.*}}"--lto-partitions=8"
+
+// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
+// RUN:   -L. -foffload-lto --flto-partitions=42 %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS %s
 // LTO_PARTS: lld{{.*}}"--lto-partitions=42"
 
 // RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
-// RUN:   -L. -flto --flto-partitions=a %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS_INV0 %s
+// RUN:   -L. -foffload-lto --flto-partitions=a %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS_INV0 %s
 // LTO_PARTS_INV0: clang: error: invalid integral value 'a' in 
'--flto-partitions=a'
 
 // RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
-// RUN:   -L. -flto --flto-partitions=0 %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS_INV1 %s
+// RUN:   -L. -foffload-lto --flto-partitions=0 %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS_INV1 %s
 // LTO_PARTS_INV1: clang: error: invalid integral value '0' in 
'--flto-partitions=0'

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to