https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/102972
>From 4e2834e33249f16b2f30574020c60e5bb62fe4b9 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Mon, 12 Aug 2024 15:10:03 -0500 Subject: [PATCH] [LinkerWrapper] Always pass `-flto` if the linker supports it Summary; Now that we use the linker to do LTO / device linking, we need to inform the `clang` invocation to use `-flto` so it forwards arguments like `-On` correctly. --- clang/test/Driver/linker-wrapper.c | 20 +++++++++---------- .../ClangLinkerWrapper.cpp | 1 + .../api/omp_dynamic_shared_memory_amdgpu.c | 2 +- .../omp_dynamic_shared_memory_mixed_amdgpu.c | 2 +- offload/test/jit/empty_kernel_lvl2.c | 8 -------- offload/test/offloading/bug51781.c | 6 +++--- offload/test/offloading/bug51982.c | 6 +++--- 7 files changed, 19 insertions(+), 26 deletions(-) diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index e70715d2a9bd7e..068ea2d7d3c663 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -21,7 +21,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK -// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o +// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ @@ -30,7 +30,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG -// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 {{.*}}.o {{.*}}.o -g +// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ @@ -39,7 +39,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK -// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \ @@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS -// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps +// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -flto -Wl,--no-undefined {{.*}}.o -save-temps // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ @@ -59,7 +59,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --linker-path=/usr/bin/ld.lld --whole-archive %t.a --no-whole-archive \ // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK -// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive +// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \ @@ -148,7 +148,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND -// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o +// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 @@ -171,8 +171,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t-lib.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic @@ -187,8 +187,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL -// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o -// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o +// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 52e6809a122706..9fea1fdcd5fb46 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -527,6 +527,7 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { // Forward all of the `--offload-opt` and similar options to the device. if (linkerSupportsLTO(Args)) { + CmdArgs.push_back("-flto"); for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) CmdArgs.append( {"-Xlinker", diff --git a/offload/test/api/omp_dynamic_shared_memory_amdgpu.c b/offload/test/api/omp_dynamic_shared_memory_amdgpu.c index 0b4d9d6ea9d46e..7ddb13f40edc76 100644 --- a/offload/test/api/omp_dynamic_shared_memory_amdgpu.c +++ b/offload/test/api/omp_dynamic_shared_memory_amdgpu.c @@ -1,4 +1,4 @@ -// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm -openmp-opt-inline-device +// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O2 -mllvm -openmp-opt-inline-device // RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \ // RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa // REQUIRES: amdgcn-amd-amdhsa diff --git a/offload/test/api/omp_dynamic_shared_memory_mixed_amdgpu.c b/offload/test/api/omp_dynamic_shared_memory_mixed_amdgpu.c index 656c3a20aaf82a..175f15cf7eb405 100644 --- a/offload/test/api/omp_dynamic_shared_memory_mixed_amdgpu.c +++ b/offload/test/api/omp_dynamic_shared_memory_mixed_amdgpu.c @@ -1,4 +1,4 @@ -// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm -openmp-opt-inline-device -I %S +// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O2 -mllvm -openmp-opt-inline-device -I %S // RUN: env LIBOMPTARGET_NEXTGEN_PLUGINS=1 \ // RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa // REQUIRES: amdgcn-amd-amdhsa diff --git a/offload/test/jit/empty_kernel_lvl2.c b/offload/test/jit/empty_kernel_lvl2.c index 90e4ce321fe8ae..73f8c4abb9bb13 100644 --- a/offload/test/jit/empty_kernel_lvl2.c +++ b/offload/test/jit/empty_kernel_lvl2.c @@ -21,7 +21,6 @@ // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \ // RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \ // RUN: %libomptarget-run-generic -// TODO: // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefix=FIRST // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \ // RUN: -DTGT1_DIRECTIVE="target" \ @@ -30,7 +29,6 @@ // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \ // RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \ // RUN: %libomptarget-run-generic -// TODO: // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefixes=FIRST,SECOND // // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \ @@ -39,7 +37,6 @@ // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \ // RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \ // RUN: %libomptarget-run-generic -// TODO: // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefix=FIRST // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \ // RUN: -DTGT1_DIRECTIVE="target teams" \ @@ -48,7 +45,6 @@ // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \ // RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \ // RUN: %libomptarget-run-generic -// TODO: // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefixes=FIRST,SECOND // // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \ @@ -57,7 +53,6 @@ // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \ // RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \ // RUN: %libomptarget-run-generic -// TODO: // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefix=FIRST // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \ // RUN: -DTGT1_DIRECTIVE="target teams" \ @@ -66,7 +61,6 @@ // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \ // RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \ // RUN: %libomptarget-run-generic -// TODO: // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefixes=FIRST,SECOND // // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \ @@ -75,7 +69,6 @@ // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \ // RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \ // RUN: %libomptarget-run-generic -// TODO: // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefix=FIRST // RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \ // RUN: -DTGT1_DIRECTIVE="target teams" \ @@ -84,7 +77,6 @@ // RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \ // RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \ // RUN: %libomptarget-run-generic -// TODO: // RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefixes=FIRST,SECOND // clang-format on diff --git a/offload/test/offloading/bug51781.c b/offload/test/offloading/bug51781.c index 35ecf55aa8c534..17b7499a7606e4 100644 --- a/offload/test/offloading/bug51781.c +++ b/offload/test/offloading/bug51781.c @@ -5,7 +5,7 @@ // SPMDize. There is no main thread, so there's no issue. // -// RUN: %libomptarget-compile-generic -O1 -Rpass=openmp-opt > %t.spmd 2>&1 +// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt > %t.spmd 2>&1 // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=SPMD -input-file=%t.spmd // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=SPMD -input-file=%t.spmd // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic @@ -15,7 +15,7 @@ // Use the custom state machine, which must avoid the same barrier problem as // the generic state machine. // -// RUN: %libomptarget-compile-generic -O1 -Rpass=openmp-opt \ +// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt \ // RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1 // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom @@ -24,7 +24,7 @@ // Repeat with reduction clause, which has managed to break the custom state // machine in the past. // -// RUN: %libomptarget-compile-generic -O1 -Rpass=openmp-opt -DADD_REDUCTION \ +// RUN: %libomptarget-compile-generic -O2 -Rpass=openmp-opt -DADD_REDUCTION \ // RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1 // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom diff --git a/offload/test/offloading/bug51982.c b/offload/test/offloading/bug51982.c index 91ce4a264e2382..b19707aacde983 100644 --- a/offload/test/offloading/bug51982.c +++ b/offload/test/offloading/bug51982.c @@ -1,6 +1,6 @@ -// RUN: %libomptarget-compile-generic -O1 && %libomptarget-run-generic -// -O1 to run openmp-opt -// RUN: %libomptarget-compileopt-generic -O1 && %libomptarget-run-generic +// RUN: %libomptarget-compile-generic -O2 && %libomptarget-run-generic +// -O2 to run openmp-opt +// RUN: %libomptarget-compileopt-generic -O2 && %libomptarget-run-generic int main(void) { long int aa = 0; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits