llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) <details> <summary>Changes</summary> Summary: The https://github.com/llvm/llvm-project/pull/128509 patch introduced `--flto-partitions`. This was marked as a HIP only argument, and was also spelled and handled incorrectly for an `-f` option. This patch makes the handling generic for `ld.lld` consumers. This also fixes some issues with emitting the flags being put after the default arguments, preventing users from overriding them. Also, forwards things properly for the new driver so we can test this. --- Full diff: https://github.com/llvm/llvm-project/pull/133283.diff 9 Files Affected: - (modified) clang/include/clang/Driver/Options.td (+2-2) - (modified) clang/lib/Driver/ToolChains/AMDGPU.cpp (+4-27) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+3-1) - (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+11) - (modified) clang/lib/Driver/ToolChains/HIPAMD.cpp (+4-2) - (modified) clang/test/Driver/amdgpu-toolchain.c (+3-3) - (modified) clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip (+6-6) - (modified) clang/test/Driver/hip-toolchain-rdc-static-lib.hip (+2-2) - (modified) clang/test/Driver/hip-toolchain-rdc.hip (+15-11) ``````````diff diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a7fcb160d3867..b43c53f5f419d 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1392,8 +1392,6 @@ def fhip_emit_relocatable : Flag<["-"], "fhip-emit-relocatable">, HelpText<"Compile HIP source to relocatable">; def fno_hip_emit_relocatable : Flag<["-"], "fno-hip-emit-relocatable">, HelpText<"Do not override toolchain to compile HIP source to relocatable">; -def flto_partitions_EQ : Joined<["--"], "flto-partitions=">, Group<hip_Group>, - HelpText<"Number of partitions to use for parallel full LTO codegen. Use 1 to disable partitioning.">; } // Clang specific/exclusive options for OpenACC. @@ -3043,6 +3041,8 @@ defm fat_lto_objects : BoolFOption<"fat-lto-objects", PosFlag<SetTrue, [], [ClangOption, CC1Option], "Enable">, NegFlag<SetFalse, [], [ClangOption, CC1Option], "Disable">, BothFlags<[], [ClangOption, CC1Option], " fat LTO object support">>; +def flto_partitions_EQ : Joined<["-"], "flto-partitions=">, Group<f_Group>, + HelpText<"Number of partitions to use for parallel full LTO codegen, ld.lld only.">; def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">, Group<f_Group>, Visibility<[ClangOption, CC1Option, CLOption]>, HelpText<"Set the maximum number of entries to print in a macro expansion backtrace (0 = no limit)">, diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 1c5bb08568801..72c03fb3154e2 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -625,22 +625,19 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-shared"); } - addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs); - Args.AddAllArgs(CmdArgs, options::OPT_L); - getToolChain().AddFilePathLibArgs(Args, CmdArgs); - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); if (C.getDriver().isUsingLTO()) { const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin); addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO); - - if (!ThinLTO && JA.getOffloadingDeviceKind() == Action::OFK_HIP) - addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs); } else if (Args.hasArg(options::OPT_mcpu_EQ)) { CmdArgs.push_back(Args.MakeArgString( "-plugin-opt=mcpu=" + getProcessorFromTargetID(getToolChain().getTriple(), Args.getLastArgValue(options::OPT_mcpu_EQ)))); } + addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs); + getToolChain().AddFilePathLibArgs(Args, CmdArgs); + Args.AddAllArgs(CmdArgs, options::OPT_L); + AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); // Always pass the target-id features to the LTO job. std::vector<StringRef> Features; @@ -711,26 +708,6 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, options::OPT_m_amdgpu_Features_Group); } -static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) { - int Value = 0; - StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8"); - if (A.getAsInteger(10, Value) || (Value < 1)) { - Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ); - D.Diag(diag::err_drv_invalid_int_value) - << Arg->getAsString(Args) << Arg->getValue(); - return 1; - } - - return Value; -} - -void amdgpu::addFullLTOPartitionOption(const Driver &D, - const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) { - CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" + - Twine(getFullLTOPartitions(D, Args)))); -} - /// AMDGPU Toolchain AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e1e8f57dd6455..40ecc0aee48b3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9217,6 +9217,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, OPT_load, OPT_fno_lto, OPT_flto, + OPT_flto_partitions_EQ, OPT_flto_EQ}; const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm, OPT_Zlinker_input}; auto ShouldForward = [&](const llvm::DenseSet<unsigned> &Set, Arg *A) { @@ -9226,7 +9227,8 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, }; ArgStringList CmdArgs; - for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) { + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP, + Action::OFK_HIP, Action::OFK_SYCL}) { auto TCRange = C.getOffloadToolChains(Kind); for (auto &I : llvm::make_range(TCRange)) { const ToolChain *TC = I.second; diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 1945c95458c54..41cfa3d2e4f8c 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -899,6 +899,17 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, // files if (IsFatLTO) CmdArgs.push_back("--fat-lto-objects"); + + if (Args.hasArg(options::OPT_flto_partitions_EQ)) { + int Value = 0; + StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8"); + if (A.getAsInteger(10, Value) || (Value < 1)) { + Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ); + D.Diag(diag::err_drv_invalid_int_value) + << Arg->getAsString(Args) << Arg->getValue(); + } + CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" + A)); + } } const char *PluginOptPrefix = IsOSAIX ? "-bplugin_opt:" : "-plugin-opt="; diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index dc3300b00f9ff..abb83701759ce 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -116,8 +116,6 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, addLinkerCompressDebugSectionsOption(TC, Args, LldArgs); - amdgpu::addFullLTOPartitionOption(D, Args, LldArgs); - // Given that host and device linking happen in separate processes, the device // linker doesn't always have the visibility as to which device symbols are // needed by a program, especially for the device symbol dependencies that are @@ -294,6 +292,10 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, checkTargetID(*DAL); } + if (!Args.hasArg(options::OPT_flto_partitions_EQ)) + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ), + "8"); + return DAL; } diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c index 20656f1fadeb7..2a35c13746a0e 100644 --- a/clang/test/Driver/amdgpu-toolchain.c +++ b/clang/test/Driver/amdgpu-toolchain.c @@ -20,12 +20,12 @@ // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \ // RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s -// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions" -// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack" +// LTO: clang{{.*}}"-flto=full"{{.*}}"-fconvergent-functions" +// LTO: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"{{.*}} // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \ // RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s -// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack" +// MCPU: ld.lld{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"{{.*}} // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \ // RUN: -fuse-ld=ld %s 2>&1 | FileCheck -check-prefixes=LD %s diff --git a/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip b/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip index e345bd3f5be6b..4439547ea8ad9 100644 --- a/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip +++ b/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip @@ -1,5 +1,5 @@ // RUN: %clang -### --target=x86_64-linux-gnu \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=42 \ +// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=42 \ // RUN: --no-offload-new-driver --emit-static-lib -nogpulib \ // RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ @@ -10,26 +10,26 @@ // FIXED-PARTS-NOT: ".*opt" // FIXED-PARTS-NOT: ".*llc" // FIXED-PARTS: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" -// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803" // FIXED-PARTS-SAME: "--lto-partitions=42" +// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803" // FIXED-PARTS-SAME: "-o" "{{.*out}}" "{{.*bc}}" // RUN: not %clang -### --target=x86_64-linux-gnu \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=a \ +// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=a \ // RUN: --no-offload-new-driver --emit-static-lib -nogpulib \ // RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV0 -// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a' +// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a' // RUN: not %clang -### --target=x86_64-linux-gnu \ -// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=0 \ +// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=0 \ // RUN: --no-offload-new-driver --emit-static-lib -nogpulib \ // RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV1 -// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0' +// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0' diff --git a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip index 6f38a06f7cf31..05d276ba57bda 100644 --- a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip +++ b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip @@ -48,8 +48,8 @@ // CHECK-NOT: ".*opt" // CHECK-NOT: ".*llc" // CHECK: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" -// CHECK-SAME: "-plugin-opt=mcpu=gfx803" // CHECK-SAME: "--lto-partitions={{[0-9]+}}" +// CHECK-SAME: "-plugin-opt=mcpu=gfx803" // CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[A_BC1]] [[B_BC1]] // generate image for device side path on gfx900 @@ -77,8 +77,8 @@ // CHECK-NOT: ".*opt" // CHECK-NOT: ".*llc" // CHECK: [[LLD]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" -// CHECK-SAME: "-plugin-opt=mcpu=gfx900" // CHECK-SAME: "--lto-partitions={{[0-9]+}}" +// CHECK-SAME: "-plugin-opt=mcpu=gfx900" // CHECK-SAME: "--whole-archive" // CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[A_BC2]] [[B_BC2]] // CHECK-SAME: "--no-whole-archive" diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip index d5119e71a3569..204400aeaa15d 100644 --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -146,8 +146,8 @@ // CHECK-NOT: ".*opt" // CHECK-NOT: ".*llc" // CHECK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" -// CHECK-SAME: "-plugin-opt=mcpu=gfx900" // CHECK-SAME: "--lto-partitions={{[0-9]+}}" +// CHECK-SAME: "-plugin-opt=mcpu=gfx900" // CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]] // combine images generated into hip fat binary object @@ -162,20 +162,24 @@ // LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]] // MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]] -// Check --flto-partitions +// Check -flto-partitions -// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \ +// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \ // RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s // LTO_DEFAULT: lld{{.*}}"--lto-partitions=8" -// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \ -// RUN: -L. -foffload-lto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s +// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --offload-new-driver \ +// RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT_NEW %s +// LTO_DEFAULT_NEW: clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-flto-partitions=8" + +// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \ +// RUN: -L. -foffload-lto -flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s // LTO_PARTS: lld{{.*}}"--lto-partitions=42" -// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \ -// RUN: -L. -foffload-lto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s -// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a' +// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \ +// RUN: -L. -foffload-lto -flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s +// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a' -// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \ -// RUN: -L. -foffload-lto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s -// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0' +// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \ +// RUN: -L. -foffload-lto -flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s +// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0' `````````` </details> https://github.com/llvm/llvm-project/pull/133283 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits