[clang] [llvm] [LinkerWrapper] Always pass `-flto` if the linker supports it (PR #102972)

Joseph Huber via cfe-commits Tue, 13 Aug 2024 08:36:50 -0700

https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/102972


>From 4e2834e33249f16b2f30574020c60e5bb62fe4b9 Mon Sep 17 00:00:00 2001
From: Joseph Huber <[email protected]>
Date: Mon, 12 Aug 2024 15:10:03 -0500
Subject: [PATCH] [LinkerWrapper] Always pass `-flto` if the linker supports it

Summary;
Now that we use the linker to do LTO / device linking, we need to inform
the `clang` invocation to use `-flto` so it forwards arguments like
`-On` correctly.
---
 clang/test/Driver/linker-wrapper.c            | 20 +++++++++----------
 .../ClangLinkerWrapper.cpp                    |  1 +
 .../api/omp_dynamic_shared_memory_amdgpu.c    |  2 +-
 .../omp_dynamic_shared_memory_mixed_amdgpu.c  |  2 +-
 offload/test/jit/empty_kernel_lvl2.c          |  8 --------
 offload/test/offloading/bug51781.c            |  6 +++---
 offload/test/offloading/bug51982.c            |  6 +++---
 7 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/clang/test/Driver/linker-wrapper.c 
b/clang/test/Driver/linker-wrapper.c
index e70715d2a9bd7e..068ea2d7d3c663 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -21,7 +21,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=NVPTX-LINK
 
-// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda 
-march=sm_70 -O2 {{.*}}.o {{.*}}.o
+// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda 
-march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   
--image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
@@ -30,7 +30,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run 
--device-debug -O0 \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=NVPTX-LINK-DEBUG
 
-// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda 
-march=sm_70 -O2 {{.*}}.o {{.*}}.o -g 
+// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda 
-march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g 
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   
--image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
@@ -39,7 +39,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=AMDGPU-LINK
 
-// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   
--image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
@@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run 
--save-temps -O2 \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=AMDGPU-LTO-TEMPS
 
-// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps
+// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx1030 -O2 -flto -Wl,--no-undefined {{.*}}.o -save-temps
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
@@ -59,7 +59,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld --whole-archive %t.a 
--no-whole-archive \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK
 
-// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu 
-march=native -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared 
-Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
+// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu 
-march=native -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic 
-shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
 
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o
 // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu 
-mllvm -openmp-opt-disable \
@@ -148,7 +148,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run 
--clang-backend \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=CLANG-BACKEND
 
-// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o
+// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   
--image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
@@ -171,8 +171,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | 
FileCheck %s --check-prefix=AMD-TARGET-ID
 
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx90a:xnack+ -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx90a:xnack- -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa 
-mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t-lib.out \
 // RUN:   
--image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic
@@ -187,8 +187,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck 
%s --check-prefix=ARCH-ALL
 
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a 
-O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 
-O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a 
-O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 
-O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp 
b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 52e6809a122706..9fea1fdcd5fb46 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -527,6 +527,7 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, 
const ArgList &Args) {
 
   // Forward all of the `--offload-opt` and similar options to the device.
   if (linkerSupportsLTO(Args)) {
+    CmdArgs.push_back("-flto");
     for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
       CmdArgs.append(
           {"-Xlinker",
diff --git a/offload/test/api/omp_dynamic_shared_memory_amdgpu.c 
b/offload/test/api/omp_dynamic_shared_memory_amdgpu.c
index 0b4d9d6ea9d46e..7ddb13f40edc76 100644
--- a/offload/test/api/omp_dynamic_shared_memory_amdgpu.c
+++ b/offload/test/api/omp_dynamic_shared_memory_amdgpu.c
@@ -1,4 +1,4 @@
-// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm 
-openmp-opt-inline-device
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O2 -mllvm 
-openmp-opt-inline-device
 // RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \
 // RUN:   %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
 // REQUIRES: amdgcn-amd-amdhsa
diff --git a/offload/test/api/omp_dynamic_shared_memory_mixed_amdgpu.c 
b/offload/test/api/omp_dynamic_shared_memory_mixed_amdgpu.c
index 656c3a20aaf82a..175f15cf7eb405 100644
--- a/offload/test/api/omp_dynamic_shared_memory_mixed_amdgpu.c
+++ b/offload/test/api/omp_dynamic_shared_memory_mixed_amdgpu.c
@@ -1,4 +1,4 @@
-// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm 
-openmp-opt-inline-device -I %S
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O2 -mllvm 
-openmp-opt-inline-device -I %S
 // RUN: env LIBOMPTARGET_NEXTGEN_PLUGINS=1 \
 // RUN:   %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
 // REQUIRES: amdgcn-amd-amdhsa
diff --git a/offload/test/jit/empty_kernel_lvl2.c 
b/offload/test/jit/empty_kernel_lvl2.c
index 90e4ce321fe8ae..73f8c4abb9bb13 100644
--- a/offload/test/jit/empty_kernel_lvl2.c
+++ b/offload/test/jit/empty_kernel_lvl2.c
@@ -21,7 +21,6 @@
 // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll     \
 // RUN:     LIBOMPTARGET_JIT_SKIP_OPT=true                   \
 // RUN:     %libomptarget-run-generic
-// TODO:
 // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc 
--check-prefix=FIRST
 // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
 // RUN:     -DTGT1_DIRECTIVE="target"                          \
@@ -30,7 +29,6 @@
 // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll     \
 // RUN:     LIBOMPTARGET_JIT_SKIP_OPT=true                   \
 // RUN:     %libomptarget-run-generic
-// TODO:
 // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc 
--check-prefixes=FIRST,SECOND
 //
 // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
@@ -39,7 +37,6 @@
 // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll     \
 // RUN:     LIBOMPTARGET_JIT_SKIP_OPT=true                   \
 // RUN:     %libomptarget-run-generic
-// TODO:
 // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc 
--check-prefix=FIRST
 // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
 // RUN:     -DTGT1_DIRECTIVE="target teams"                    \
@@ -48,7 +45,6 @@
 // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll     \
 // RUN:     LIBOMPTARGET_JIT_SKIP_OPT=true                   \
 // RUN:     %libomptarget-run-generic
-// TODO:
 // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc 
--check-prefixes=FIRST,SECOND
 //
 // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
@@ -57,7 +53,6 @@
 // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll     \
 // RUN:     LIBOMPTARGET_JIT_SKIP_OPT=true                   \
 // RUN:     %libomptarget-run-generic
-// TODO:
 // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc 
--check-prefix=FIRST
 // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
 // RUN:     -DTGT1_DIRECTIVE="target teams"                    \
@@ -66,7 +61,6 @@
 // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll     \
 // RUN:     LIBOMPTARGET_JIT_SKIP_OPT=true                   \
 // RUN:     %libomptarget-run-generic
-// TODO:
 // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc 
--check-prefixes=FIRST,SECOND
 //
 // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
@@ -75,7 +69,6 @@
 // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll     \
 // RUN:     LIBOMPTARGET_JIT_SKIP_OPT=true                   \
 // RUN:     %libomptarget-run-generic
-// TODO:
 // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc 
--check-prefix=FIRST
 // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
 // RUN:     -DTGT1_DIRECTIVE="target teams"                    \
@@ -84,7 +77,6 @@
 // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll     \
 // RUN:     LIBOMPTARGET_JIT_SKIP_OPT=true                   \
 // RUN:     %libomptarget-run-generic
-// TODO:
 // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc 
--check-prefixes=FIRST,SECOND
 // clang-format on
 
diff --git a/offload/test/offloading/bug51781.c 
b/offload/test/offloading/bug51781.c
index 35ecf55aa8c534..17b7499a7606e4 100644
--- a/offload/test/offloading/bug51781.c
+++ b/offload/test/offloading/bug51781.c
@@ -5,7 +5,7 @@
 
 // SPMDize.  There is no main thread, so there's no issue.
 //
-// RUN: %libomptarget-compile-generic -O1 -Rpass=openmp-opt > %t.spmd 2>&1
+// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt > %t.spmd 2>&1
 // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=SPMD -input-file=%t.spmd
 // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=SPMD -input-file=%t.spmd
 // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
@@ -15,7 +15,7 @@
 // Use the custom state machine, which must avoid the same barrier problem as
 // the generic state machine.
 //
-// RUN: %libomptarget-compile-generic -O1 -Rpass=openmp-opt \
+// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt \
 // RUN:   -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
 // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
 // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
@@ -24,7 +24,7 @@
 // Repeat with reduction clause, which has managed to break the custom state
 // machine in the past.
 //
-// RUN: %libomptarget-compile-generic -O1 -Rpass=openmp-opt -DADD_REDUCTION \
+// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt -DADD_REDUCTION \
 // RUN:   -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
 // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
 // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
diff --git a/offload/test/offloading/bug51982.c 
b/offload/test/offloading/bug51982.c
index 91ce4a264e2382..b19707aacde983 100644
--- a/offload/test/offloading/bug51982.c
+++ b/offload/test/offloading/bug51982.c
@@ -1,6 +1,6 @@
-// RUN: %libomptarget-compile-generic -O1 && %libomptarget-run-generic
-// -O1 to run openmp-opt
-// RUN: %libomptarget-compileopt-generic -O1 && %libomptarget-run-generic
+// RUN: %libomptarget-compile-generic -O2 && %libomptarget-run-generic
+// -O2 to run openmp-opt
+// RUN: %libomptarget-compileopt-generic -O2 && %libomptarget-run-generic
 
 int main(void) {
   long int aa = 0;

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [LinkerWrapper] Always pass `-flto` if the linker supports it (PR #102972)

Reply via email to