[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
https://github.com/omarahmed updated https://github.com/llvm/llvm-project/pull/125243 >From f3d466bffc2113b54d62e9a45370c643800b218c Mon Sep 17 00:00:00 2001 From: omarahmed Date: Fri, 31 Jan 2025 15:42:11 + Subject: [PATCH] Pass -offload-lto instead of -lto for cuda/hip kernels --- clang/test/Driver/linker-wrapper.c | 18 +- .../ClangLinkerWrapper.cpp | 6 +- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index f416ee5f4463bcc..0a2bc16a15ae492 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -21,7 +21,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK -// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o +// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -foffload-lto {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ @@ -30,7 +30,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG -// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g +// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -foffload-lto {{.*}}.o {{.*}}.o -g // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ @@ -39,7 +39,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK -// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \ @@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS -// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -flto -Wl,--no-undefined {{.*}}.o -save-temps +// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o -save-temps // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ @@ -148,7 +148,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND -// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o +// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 @@ -171,8 +171,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t-lib.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic @@ -187,8 +187,8 @@ __
[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
@@ -498,12 +498,17 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { }; // Forward all of the `--offload-opt` and similar options to the device. - CmdArgs.push_back("-flto"); for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) CmdArgs.append( {"-Xlinker", Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); + if (Triple.isNVPTX() || Triple.isAMDGPU()) { omarahmed wrote: Fixed https://github.com/llvm/llvm-project/pull/125243 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
@@ -498,12 +498,16 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { }; // Forward all of the `--offload-opt` and similar options to the device. - CmdArgs.push_back("-flto"); for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) CmdArgs.append( {"-Xlinker", Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); + if (Triple.isNVPTX() || Triple.isAMDGPU()) +CmdArgs.push_back("-foffload-lto"); + else +CmdArgs.push_back("-flto"); omarahmed wrote: This is the original issue: https://github.com/intel/llvm/issues/16413 . It has some more details. I could try to see if I could come up with a concise reproducer for this. https://github.com/llvm/llvm-project/pull/125243 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
@@ -498,12 +498,16 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { }; // Forward all of the `--offload-opt` and similar options to the device. - CmdArgs.push_back("-flto"); for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) CmdArgs.append( {"-Xlinker", Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); + if (Triple.isNVPTX() || Triple.isAMDGPU()) +CmdArgs.push_back("-foffload-lto"); + else +CmdArgs.push_back("-flto"); omarahmed wrote: It was breaking some tests where one of the pipeline commands was compiling using clang from llvm IR to ptx but the output from this was the input llvm IR (I think that means something have failed). After investigation, it appeared that `-flto` is the reason for this breaking behaviour. I don't have strong opinions on using `-foffload-lto`, but I think if `-flto` is not needed, we could avoid using it entirely for CUDA/AMD kernels. https://github.com/llvm/llvm-project/pull/125243 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
@@ -498,12 +498,16 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { }; // Forward all of the `--offload-opt` and similar options to the device. - CmdArgs.push_back("-flto"); for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) CmdArgs.append( {"-Xlinker", Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); + if (Triple.isNVPTX() || Triple.isAMDGPU()) +CmdArgs.push_back("-foffload-lto"); + else +CmdArgs.push_back("-flto"); omarahmed wrote: I managed to get similar behaviour from this small example: https://godbolt.org/z/xaTfGrajd https://github.com/llvm/llvm-project/pull/125243 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
@@ -498,12 +498,16 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { }; // Forward all of the `--offload-opt` and similar options to the device. - CmdArgs.push_back("-flto"); for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) CmdArgs.append( {"-Xlinker", Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); + if (Triple.isNVPTX() || Triple.isAMDGPU()) +CmdArgs.push_back("-foffload-lto"); + else +CmdArgs.push_back("-flto"); omarahmed wrote: It is the fork for intel/llvm ``` > clang --version clang version 20.0.0git (https://github.com/intel/llvm.git 0b71135afbca78ad5095ff09868e8f966755742a) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: build2/bin Build config: +assertions ``` https://github.com/llvm/llvm-project/pull/125243 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
https://github.com/omarahmed updated https://github.com/llvm/llvm-project/pull/125243 >From fcfe4fafa937b6320e779f56c2ba42327df143d4 Mon Sep 17 00:00:00 2001 From: omarahmed Date: Fri, 31 Jan 2025 15:42:11 + Subject: [PATCH] Pass -offload-lto instead of -lto for cuda/hip kernels --- clang/test/Driver/linker-wrapper.c | 18 +- .../ClangLinkerWrapper.cpp | 7 ++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index f416ee5f4463bcc..0a2bc16a15ae492 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -21,7 +21,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK -// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o +// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -foffload-lto {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ @@ -30,7 +30,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG -// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g +// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -foffload-lto {{.*}}.o {{.*}}.o -g // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ @@ -39,7 +39,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK -// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \ @@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS -// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -flto -Wl,--no-undefined {{.*}}.o -save-temps +// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o -save-temps // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ @@ -148,7 +148,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND -// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o +// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 @@ -171,8 +171,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t-lib.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic @@ -187,8 +187,8 @@ _
[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
https://github.com/omarahmed created https://github.com/llvm/llvm-project/pull/125243 ClangLinkerWrapper tool in one of its clang commands to generate ptx kernel binary from llvm bitcode kernel was using -flto option which should be only used for cpu code not gpu kernel code. This PR fixes that by changing that to -foffload-lto for cuda/hip kernels. >From b771905671c80912aceeec414a49fe4581c9b796 Mon Sep 17 00:00:00 2001 From: omarahmed Date: Fri, 31 Jan 2025 15:42:11 + Subject: [PATCH] Pass -offload-lto instead of -lto for cuda/hip kernels --- clang/test/Driver/linker-wrapper.c | 18 +- .../ClangLinkerWrapper.cpp | 7 ++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index f416ee5f4463bcc..0a2bc16a15ae492 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -21,7 +21,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK -// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o +// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -foffload-lto {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ @@ -30,7 +30,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG -// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g +// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -foffload-lto {{.*}}.o {{.*}}.o -g // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ @@ -39,7 +39,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK -// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \ @@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS -// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -flto -Wl,--no-undefined {{.*}}.o -save-temps +// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o -save-temps // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ @@ -148,7 +148,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND -// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o +// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 @@ -171,8 +171,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o
[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
@@ -498,12 +498,16 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { }; // Forward all of the `--offload-opt` and similar options to the device. - CmdArgs.push_back("-flto"); for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) CmdArgs.append( {"-Xlinker", Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); + if (Triple.isNVPTX() || Triple.isAMDGPU()) +CmdArgs.push_back("-foffload-lto"); + else +CmdArgs.push_back("-flto"); omarahmed wrote: I just noticed the command that you use is not using `-S` option. I tried without it, and it is working normally, but `-S` option seems to introduce this behaviour, instead of generating ptx readable code, it generates the input llvm IR optimized. https://github.com/llvm/llvm-project/pull/125243 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Pass -offload-lto instead of -lto for cuda/hip kernels (PR #125243)
https://github.com/omarahmed closed https://github.com/llvm/llvm-project/pull/125243 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits