https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/202699

>From 230a76e5fe5b54c88a1d9fc5fe2c9515e0bb13d6 Mon Sep 17 00:00:00 2001
From: Joseph Huber <[email protected]>
Date: Tue, 9 Jun 2026 10:53:33 -0500
Subject: [PATCH] [HIP] Fix `-flto` overriding `--no-lto` not that it is
 default

Summary:
The previous changes to LTO made the flto flag passed by default which
overrode the hack we did to ervert to the old non-LTO pipline. This is a
temporary hack so I'm hacking it even further to fix it.
---
 .../linker-wrapper-hip-no-rdc.c                 |  6 +++---
 .../clang-linker-wrapper/linker-wrapper.c       | 14 +++++++-------
 .../clang-linker-wrapper/ClangLinkerWrapper.cpp | 17 ++++++-----------
 3 files changed, 16 insertions(+), 21 deletions(-)

diff --git 
a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c 
b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c
index 5c5b7b1eabfab..80ac493825aad 100644
--- a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c
+++ b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c
@@ -59,11 +59,11 @@ __attribute__((visibility("protected"), used)) int x;
 // Without --no-lto the AMDGPU device compilation uses the LTO pipeline
 // (-flto).
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu 
--wrapper-verbose --dry-run --emit-fatbin-only --linker-path=/usr/bin/ld %t.out 
-o %t.lto.hipfb 2>&1 | FileCheck %s --check-prefix=LTO
-// LTO: clang{{.*}} -mcpu=gfx1200{{.*}} -flto
+// LTO: clang{{.*}} -mcpu=gfx1200
 
 // With --no-lto the AMDGPU device compilation uses the conventional non-LTO
 // pipeline: -flto must not be passed, and '-x ir' must be passed so Clang
 // compiles the bitcode (stored in an object-extension file) instead of
 // handing it to the LTO link.
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu 
--wrapper-verbose --dry-run --no-lto --emit-fatbin-only 
--linker-path=/usr/bin/ld %t.out -o %t.nolto.hipfb 2>&1 | FileCheck %s 
--check-prefix=NO-LTO --implicit-check-not=-flto
-// NO-LTO: clang{{.*}} -mcpu=gfx1200{{.*}} -x ir
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu 
--wrapper-verbose --dry-run --no-lto --emit-fatbin-only 
--linker-path=/usr/bin/ld %t.out -o %t.nolto.hipfb 2>&1 | FileCheck %s 
--check-prefix=NO-LTO
+// NO-LTO: clang{{.*}} -mcpu=gfx1200{{.*}} -x ir {{.*}}-flto=none
diff --git a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c 
b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c
index 39b9bcd7425ab..a19a11e87afdb 100644
--- a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c
+++ b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c
@@ -40,7 +40,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=AMDGPU-LINK
 
-// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. 
--target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o 
{{.*}}.o
+// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. 
--target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: llvm-offload-binary -o %t.out \
 // RUN:   
--image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
@@ -49,7 +49,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run 
--device-compiler=--save-temps \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=AMDGPU-LTO-TEMPS
 
-// AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 
-flto {{.*}}-save-temps
+// AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 
{{.*}}-save-temps
 
 // RUN: llvm-offload-binary -o %t.out \
 // RUN:   
--image=file=%t.spirv.bc,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
@@ -159,7 +159,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run 
--clang-backend \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=CLANG-BACKEND
 
-// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. 
--target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o
+// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. 
--target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o
 
 // RUN: llvm-offload-binary -o %t.out \
 // RUN:   
--image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
@@ -182,8 +182,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | 
FileCheck %s --check-prefix=AMD-TARGET-ID
 
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir 
a.out.amdgcn.gfx90a:xnack+.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ 
-flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir 
a.out.amdgcn.gfx90a:xnack-.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- 
-flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir 
a.out.amdgcn.gfx90a:xnack+.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ 
-Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir 
a.out.amdgcn.gfx90a:xnack-.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- 
-Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: llvm-offload-binary -o %t-lib.out \
 // RUN:   
--image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic
@@ -198,8 +198,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck 
%s --check-prefix=ARCH-ALL
 
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. 
--target=amdgcn-amd-amdhsa -mcpu=gfx90a -flto -Wl,--no-undefined {{.*}}.o 
{{.*}}.o
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. 
--target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o 
{{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. 
--target=amdgcn-amd-amdhsa -mcpu=gfx90a -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. 
--target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp 
b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 6e4fc7060389c..cfdd11e1d298d 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -534,16 +534,6 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, 
const ArgList &Args,
     Triple.isAMDGPU() ? CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch))
                       : CmdArgs.push_back(Args.MakeArgString("-march=" + 
Arch));
 
-  // AMDGPU defaults to the LTO pipeline. Non-RDC HIP uses the conventional
-  // non-LTO pipeline so device codegen still runs here, in parallel, instead
-  // of being deferred to the LTO link.
-  // FIXME: This is a stop-gap for non-RDC. Longer term, RDC and non-RDC should
-  // share a unified interface so runtime libraries can be provided to non-RDC
-  // compilations without relying on -mlink-builtin-bitcode.
-  bool NonLTOAMDGPU = Triple.isAMDGPU() && Args.hasArg(OPT_no_lto);
-  if (Triple.isAMDGPU() && !NonLTOAMDGPU)
-    CmdArgs.push_back("-flto");
-
   // Forward all of the `--offload-opt` and `-mllvm` options to the device.
   for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
     CmdArgs.append(
@@ -557,7 +547,9 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, 
const ArgList &Args,
   // Force the IR input language so Clang runs the compile and backend phases
   // instead of treating them as linker inputs, which would defer codegen to
   // the LTO link and defeat the non-LTO pipeline.
-  if (NonLTOAMDGPU)
+  // FIXME: This is a stop-gap for non-RDC. Longer term, RDC and non-RDC should
+  //        share a unified interface.
+  if (Args.hasArg(OPT_no_lto))
     CmdArgs.append({"-x", "ir"});
   for (StringRef InputFile : InputFiles)
     CmdArgs.push_back(InputFile);
@@ -621,6 +613,9 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, 
const ArgList &Args,
   for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ))
     CmdArgs.push_back(Args.MakeArgString(Arg));
 
+  if (Args.hasArg(OPT_no_lto))
+    CmdArgs.append({"-flto=none", "-Wno-unused-command-line-argument"});
+
   if (Error Err = executeCommands(*ClangPath, CmdArgs))
     return std::move(Err);
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to