[clang] de11de3 - [InstrProf] Use i32 for GEP index from lowering llvm.instrprof.increment
Author: Ellis Hoag Date: 2021-11-19T15:45:14-08:00 New Revision: de11de308b6480fc35d901c7104f46918674418c URL: https://github.com/llvm/llvm-project/commit/de11de308b6480fc35d901c7104f46918674418c DIFF: https://github.com/llvm/llvm-project/commit/de11de308b6480fc35d901c7104f46918674418c.diff LOG: [InstrProf] Use i32 for GEP index from lowering llvm.instrprof.increment The `llvm.instrprof.increment` intrinsic uses `i32` for the index. We should use this same type for the index into the GEP instructions. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D114268 Added: Modified: clang/test/CodeGen/profile-filter.c clang/test/Profile/branch-logical-mixed.cpp clang/test/Profile/c-captured.c clang/test/Profile/c-general.c clang/test/Profile/c-ternary.c clang/test/Profile/cxx-class.cpp clang/test/Profile/cxx-lambda.cpp clang/test/Profile/cxx-rangefor.cpp clang/test/Profile/cxx-stmt-initializers.cpp clang/test/Profile/cxx-templates.cpp clang/test/Profile/cxx-throws.cpp clang/test/Profile/objc-general.m llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp llvm/test/Instrumentation/InstrProfiling/atomic-updates.ll llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll llvm/test/Transforms/PGOProfile/counter_promo_exit_catchswitch.ll llvm/test/Transforms/PGOProfile/instr_entry_bb.ll Removed: diff --git a/clang/test/CodeGen/profile-filter.c b/clang/test/CodeGen/profile-filter.c index d39097076fc9c..5d71657f8f2d7 100644 --- a/clang/test/CodeGen/profile-filter.c +++ b/clang/test/CodeGen/profile-filter.c @@ -37,11 +37,11 @@ unsigned i; // EXCLUDE: noprofile // EXCLUDE: @test1 unsigned test1() { - // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) - // FUNC: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) - // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) - // SECTION-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) - // EXCLUDE-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) + // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i32 0, i32 0) + // FUNC: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i32 0, i32 0) + // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i32 0, i32 0) + // SECTION-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i32 0, i32 0) + // EXCLUDE-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i32 0, i32 0) return i + 1; } @@ -56,10 +56,10 @@ unsigned test1() { // EXCLUDE-NOT: noprofile // EXCLUDE: @test2 unsigned test2() { - // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) - // FUNC-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) - // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) - // SECTION: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) - // EXCLUDE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) + // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i32 0, i32 0) + // FUNC-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i32 0, i32 0) + // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i32 0, i32 0) + // SECTION: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i32 0, i32 0) + // EXCLUDE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i32 0, i32 0) return i - 1; } diff --git a/clang/test/Profile/branch-logical-mixed.cpp b/clang/test/Profile/branch-logical-mixed.cpp index 04b51d81d13bd..cdcee76c3d0cf 100644 --- a/clang/test/Profile/branch-logical-mixed.cpp +++ b/clang/test/Profile/branch-logical-mixed.cpp @@ -23,44 +23,44 @@ bool func() { bool bf5 = false; bool a = bt0 && - bf0 && // CHECK: store {{.*}} @[[FUNC]], i64 0, i64 10 - bt1 && // CHECK: store {{.*}} @[[FUNC]], i64 0, i64 8 - bf1 && // CHECK: store {{.*}} @[[FUNC]], i64 0, i64 6 - bt2 && // CHECK: store {{.*}} @[[FUNC]], i64
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90304 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compilation
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com ready_for_review https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90934 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] Clang2 (PR #105453)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/105453 >From a0a5bea46b69f2b40dd7fa737efdd51281a39429 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Wed, 24 Apr 2024 11:26:23 -0700 Subject: [PATCH 1/4] [CGData][MachineOutliner] Global Outlining2 This commit introduces support for outlining functions across modules using codegen data generated from previous codegen. The codegen data currently manages the outlined hash tree, which records outlining instances that occurred locally in the past. The machine outliner now operates in one of three modes: 1. CGDataMode::None: This is the default outliner mode that uses the suffix tree to identify (local) outlining candidates within a module. This mode is also used by (full)LTO to maintain optimal behavior with the combined module. 2. CGDataMode::Write (`codegen-data-generate`): This mode is identical to the default mode, but it also publishes the stable hash sequences of instructions in the outlined functions into a local outlined hash tree. It then encodes this into the `__llvm_outline` section, which will be dead-stripped at link time. 3. CGDataMode::Read (`codegen-data-use-path={.cgdata}`): This mode reads a codegen data file (.cgdata) and initializes a global outlined hash tree. This tree is used to generate global outlining candidates. Note that the codegen data file has been post-processed with the raw `__llvm_outline` sections from all native objects using the `llvm-cgdata` tool (or a linker, `LLD`, or a new ThinLTO pipeline later). --- llvm/include/llvm/ADT/StableHashing.h | 6 + llvm/include/llvm/CodeGen/MachineOutliner.h | 40 ++- llvm/lib/CGData/CodeGenData.cpp | 26 +- llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/MachineOutliner.cpp | 261 +- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + .../CodeGen/AArch64/cgdata-global-hash.ll | 40 +++ .../CodeGen/AArch64/cgdata-outlined-name.ll | 41 +++ .../AArch64/cgdata-read-double-outline.ll | 57 .../AArch64/cgdata-read-lto-outline.ll| 96 +++ .../CodeGen/AArch64/cgdata-read-priority.ll | 68 + .../cgdata-read-single-outline-suffix.ll | 100 +++ .../AArch64/cgdata-read-single-outline.ll | 42 +++ .../CodeGen/AArch64/cgdata-write-outline.ll | 51 llvm/test/CodeGen/RISCV/O3-pipeline.ll| 1 + llvm/unittests/MIR/MachineStableHashTest.cpp | 70 + 16 files changed, 897 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cgdata-global-hash.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-priority.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-single-outline-suffix.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-write-outline.ll diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h index 7852199f8b0a00..b220a0ed1f9131 100644 --- a/llvm/include/llvm/ADT/StableHashing.h +++ b/llvm/include/llvm/ADT/StableHashing.h @@ -53,6 +53,12 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, // Removes suffixes introduced by LLVM from the name to enhance stability and // maintain closeness to the original name across different builds. inline StringRef get_stable_name(StringRef Name) { + // Return the part after ".content." that represents contents. + auto [P0, S0] = Name.rsplit(".content."); + if (!S0.empty()) +return S0; + + // Ignore these suffixes. auto [P1, S1] = Name.rsplit(".llvm."); auto [P2, S2] = P1.rsplit(".__uniq."); return P2; diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index eaba6c9b18f2bb..fbb958ccf6488e 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineStableHash.h" #include namespace llvm { @@ -234,11 +235,11 @@ struct OutlinedFunction { unsigned FrameConstructionID = 0; /// Return the number of candidates for this \p OutlinedFunction. - unsigned getOccurrenceCount() const { return Candidates.size(); } + virtual unsigned getOccurrenceCount() const { return Candidates.size(); } /// Return the number of bytes it would take to outline this /// function. - unsigned getOutliningCost() const { + virtual unsigned getOutliningCost() const { unsigned CallOverhead = 0; for (const Candidate &C : Candidates) CallOverhead += C.getCallOverhead(); @@ -272,7 +
[clang] [lld] [llvm] Thin2 (PR #106602)
https://github.com/kyulee-com created https://github.com/llvm/llvm-project/pull/106602 None >From a0a5bea46b69f2b40dd7fa737efdd51281a39429 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Wed, 24 Apr 2024 11:26:23 -0700 Subject: [PATCH 1/6] [CGData][MachineOutliner] Global Outlining2 This commit introduces support for outlining functions across modules using codegen data generated from previous codegen. The codegen data currently manages the outlined hash tree, which records outlining instances that occurred locally in the past. The machine outliner now operates in one of three modes: 1. CGDataMode::None: This is the default outliner mode that uses the suffix tree to identify (local) outlining candidates within a module. This mode is also used by (full)LTO to maintain optimal behavior with the combined module. 2. CGDataMode::Write (`codegen-data-generate`): This mode is identical to the default mode, but it also publishes the stable hash sequences of instructions in the outlined functions into a local outlined hash tree. It then encodes this into the `__llvm_outline` section, which will be dead-stripped at link time. 3. CGDataMode::Read (`codegen-data-use-path={.cgdata}`): This mode reads a codegen data file (.cgdata) and initializes a global outlined hash tree. This tree is used to generate global outlining candidates. Note that the codegen data file has been post-processed with the raw `__llvm_outline` sections from all native objects using the `llvm-cgdata` tool (or a linker, `LLD`, or a new ThinLTO pipeline later). --- llvm/include/llvm/ADT/StableHashing.h | 6 + llvm/include/llvm/CodeGen/MachineOutliner.h | 40 ++- llvm/lib/CGData/CodeGenData.cpp | 26 +- llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/MachineOutliner.cpp | 261 +- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + .../CodeGen/AArch64/cgdata-global-hash.ll | 40 +++ .../CodeGen/AArch64/cgdata-outlined-name.ll | 41 +++ .../AArch64/cgdata-read-double-outline.ll | 57 .../AArch64/cgdata-read-lto-outline.ll| 96 +++ .../CodeGen/AArch64/cgdata-read-priority.ll | 68 + .../cgdata-read-single-outline-suffix.ll | 100 +++ .../AArch64/cgdata-read-single-outline.ll | 42 +++ .../CodeGen/AArch64/cgdata-write-outline.ll | 51 llvm/test/CodeGen/RISCV/O3-pipeline.ll| 1 + llvm/unittests/MIR/MachineStableHashTest.cpp | 70 + 16 files changed, 897 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cgdata-global-hash.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-priority.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-single-outline-suffix.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-write-outline.ll diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h index 7852199f8b0a00..b220a0ed1f9131 100644 --- a/llvm/include/llvm/ADT/StableHashing.h +++ b/llvm/include/llvm/ADT/StableHashing.h @@ -53,6 +53,12 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, // Removes suffixes introduced by LLVM from the name to enhance stability and // maintain closeness to the original name across different builds. inline StringRef get_stable_name(StringRef Name) { + // Return the part after ".content." that represents contents. + auto [P0, S0] = Name.rsplit(".content."); + if (!S0.empty()) +return S0; + + // Ignore these suffixes. auto [P1, S1] = Name.rsplit(".llvm."); auto [P2, S2] = P1.rsplit(".__uniq."); return P2; diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index eaba6c9b18f2bb..fbb958ccf6488e 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineStableHash.h" #include namespace llvm { @@ -234,11 +235,11 @@ struct OutlinedFunction { unsigned FrameConstructionID = 0; /// Return the number of candidates for this \p OutlinedFunction. - unsigned getOccurrenceCount() const { return Candidates.size(); } + virtual unsigned getOccurrenceCount() const { return Candidates.size(); } /// Return the number of bytes it would take to outline this /// function. - unsigned getOutliningCost() const { + virtual unsigned getOutliningCost() const { unsigned CallOverhead = 0; for (const Candidate &C : Candidates) CallOverhead += C.getCallOverhead(); @@ -2
[clang] [lld] [llvm] Thin2 (PR #106602)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/106602 >From 561eb1810f04f373410ba2f37f846eafe46515dc Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Wed, 24 Apr 2024 11:26:23 -0700 Subject: [PATCH 1/6] [CGData][MachineOutliner] Global Outlining2 This commit introduces support for outlining functions across modules using codegen data generated from previous codegen. The codegen data currently manages the outlined hash tree, which records outlining instances that occurred locally in the past. The machine outliner now operates in one of three modes: 1. CGDataMode::None: This is the default outliner mode that uses the suffix tree to identify (local) outlining candidates within a module. This mode is also used by (full)LTO to maintain optimal behavior with the combined module. 2. CGDataMode::Write (`codegen-data-generate`): This mode is identical to the default mode, but it also publishes the stable hash sequences of instructions in the outlined functions into a local outlined hash tree. It then encodes this into the `__llvm_outline` section, which will be dead-stripped at link time. 3. CGDataMode::Read (`codegen-data-use-path={.cgdata}`): This mode reads a codegen data file (.cgdata) and initializes a global outlined hash tree. This tree is used to generate global outlining candidates. Note that the codegen data file has been post-processed with the raw `__llvm_outline` sections from all native objects using the `llvm-cgdata` tool (or a linker, `LLD`, or a new ThinLTO pipeline later). --- llvm/include/llvm/ADT/StableHashing.h | 6 + llvm/include/llvm/CodeGen/MachineOutliner.h | 40 ++- llvm/lib/CGData/CodeGenData.cpp | 26 +- llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/MachineOutliner.cpp | 261 +- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + .../CodeGen/AArch64/cgdata-global-hash.ll | 40 +++ .../CodeGen/AArch64/cgdata-outlined-name.ll | 41 +++ .../AArch64/cgdata-read-double-outline.ll | 57 .../AArch64/cgdata-read-lto-outline.ll| 96 +++ .../CodeGen/AArch64/cgdata-read-priority.ll | 68 + .../cgdata-read-single-outline-suffix.ll | 100 +++ .../AArch64/cgdata-read-single-outline.ll | 42 +++ .../CodeGen/AArch64/cgdata-write-outline.ll | 51 llvm/test/CodeGen/RISCV/O3-pipeline.ll| 1 + llvm/unittests/MIR/MachineStableHashTest.cpp | 70 + 16 files changed, 897 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cgdata-global-hash.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-priority.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-single-outline-suffix.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll create mode 100644 llvm/test/CodeGen/AArch64/cgdata-write-outline.ll diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h index 7852199f8b0a00..b220a0ed1f9131 100644 --- a/llvm/include/llvm/ADT/StableHashing.h +++ b/llvm/include/llvm/ADT/StableHashing.h @@ -53,6 +53,12 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, // Removes suffixes introduced by LLVM from the name to enhance stability and // maintain closeness to the original name across different builds. inline StringRef get_stable_name(StringRef Name) { + // Return the part after ".content." that represents contents. + auto [P0, S0] = Name.rsplit(".content."); + if (!S0.empty()) +return S0; + + // Ignore these suffixes. auto [P1, S1] = Name.rsplit(".llvm."); auto [P2, S2] = P1.rsplit(".__uniq."); return P2; diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index eaba6c9b18f2bb..fbb958ccf6488e 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineStableHash.h" #include namespace llvm { @@ -234,11 +235,11 @@ struct OutlinedFunction { unsigned FrameConstructionID = 0; /// Return the number of candidates for this \p OutlinedFunction. - unsigned getOccurrenceCount() const { return Candidates.size(); } + virtual unsigned getOccurrenceCount() const { return Candidates.size(); } /// Return the number of bytes it would take to outline this /// function. - unsigned getOutliningCost() const { + virtual unsigned getOutliningCost() const { unsigned CallOverhead = 0; for (const Candidate &C : Candidates) CallOverhead += C.getCallOverhead(); @@ -272,7 +
[clang] [lld] [llvm] [CGData][ThinLTO]Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90934 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90934 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Run ObjCContractPass in Distributed Thin-LTO Pipeline (PR #92331)
https://github.com/kyulee-com approved this pull request. I think `ObjCARCContractPass` is ideally suited for MachO rather than ELF. There might be a consideration to add it conditionally, however, this could be excessive. Like the (full)LTO pass -- https://github.com/llvm/llvm-project/blob/main/llvm/lib/LTO/LTOCodeGenerator.cpp#L141, I think this approach results in a no-op for ELF and it should be okay. https://github.com/llvm/llvm-project/pull/92331 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [lld] [llvm] Run ObjCContractPass in Distributed Thin-LTO Pipeline (PR #92331)
kyulee-com wrote: Because you now add it to `codegen` unconditionally, do you also need to delete https://github.com/llvm/llvm-project/blob/main/llvm/lib/LTO/LTOCodeGenerator.cpp#L141? https://github.com/llvm/llvm-project/pull/92331 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] f1c9e7b - [ObjC Availability] Add missing const to getVersion function of ObjCAvailabilityCheckExpr class
Author: Chaoshuai Lu Date: 2022-01-24T14:52:57-08:00 New Revision: f1c9e7bdc921cec0cc3f61c19c4ac4a7f1bd8525 URL: https://github.com/llvm/llvm-project/commit/f1c9e7bdc921cec0cc3f61c19c4ac4a7f1bd8525 DIFF: https://github.com/llvm/llvm-project/commit/f1c9e7bdc921cec0cc3f61c19c4ac4a7f1bd8525.diff LOG: [ObjC Availability] Add missing const to getVersion function of ObjCAvailabilityCheckExpr class Add missing const to `getVersion` function of `ObjCAvailabilityCheckExpr` class. This feels like a bug on the original change D22171. We cannot really call this function from a const object pointer because the function is not marked as const. This diff adds the missing const specifier to fix the issue. Reviewed By: manmanren Differential Revision: https://reviews.llvm.org/D112119 Added: Modified: clang/include/clang/AST/ExprObjC.h Removed: diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h index b0f057dbaa02f..3b7ad8662ad95 100644 --- a/clang/include/clang/AST/ExprObjC.h +++ b/clang/include/clang/AST/ExprObjC.h @@ -1706,7 +1706,7 @@ class ObjCAvailabilityCheckExpr : public Expr { /// This may be '*', in which case this should fold to true. bool hasVersion() const { return !VersionToCheck.empty(); } - VersionTuple getVersion() { return VersionToCheck; } + VersionTuple getVersion() const { return VersionToCheck; } child_range children() { return child_range(child_iterator(), child_iterator()); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 47b239e - [DIBuilder] Do not replace empty enum types
Author: Ellis Hoag Date: 2021-08-30T12:33:03-07:00 New Revision: 47b239eb5a17065d13c317600c46e56ffe2d6c75 URL: https://github.com/llvm/llvm-project/commit/47b239eb5a17065d13c317600c46e56ffe2d6c75 DIFF: https://github.com/llvm/llvm-project/commit/47b239eb5a17065d13c317600c46e56ffe2d6c75.diff LOG: [DIBuilder] Do not replace empty enum types It looks like this array was missed in 4276d4a8d08b7640eb57cabf6988a5cf65b228b6 Fixed tests that expected `elements` to be empty or depeneded on the order of the empty DINode. Reviewed By: aprantl Differential Revision: https://reviews.llvm.org/D107024 Added: Modified: clang/test/CodeGen/debug-info-codeview-heapallocsite.c clang/test/CodeGen/debug-info-macro.c clang/test/CodeGenCXX/debug-info-codeview-var-templates.cpp clang/test/CodeGenCXX/debug-info-cxx1y.cpp clang/test/CodeGenCXX/debug-info-template.cpp clang/test/CodeGenCXX/debug-info-var-template-partial-spec.cpp clang/test/CodeGenCoroutines/coro-dwarf.cpp llvm/lib/IR/DIBuilder.cpp llvm/test/CodeGen/AArch64/GlobalISel/constant-mir-debugify.mir llvm/test/CodeGen/AArch64/GlobalISel/phi-mir-debugify.mir llvm/test/DebugInfo/debugify.ll Removed: diff --git a/clang/test/CodeGen/debug-info-codeview-heapallocsite.c b/clang/test/CodeGen/debug-info-codeview-heapallocsite.c index 25c102b1c37dd..3ddce910aba13 100644 --- a/clang/test/CodeGen/debug-info-codeview-heapallocsite.c +++ b/clang/test/CodeGen/debug-info-codeview-heapallocsite.c @@ -19,8 +19,8 @@ void call_alloc() { // CHECK: call i8* {{.*}}@alloc_void{{.*}} !heapallocsite [[DBG2]] // CHECK: call i8* {{.*}}@alloc_void{{.*}} !heapallocsite [[DBG3:!.*]] -// CHECK: [[DBG1]] = !{} // CHECK: [[DBG2]] = !DICompositeType(tag: DW_TAG_structure_type, // CHECK-SAME: name: "Foo" // CHECK: [[DBG3]] = !DICompositeType(tag: DW_TAG_structure_type, // CHECK-SAME: name: "Bar" +// CHECK: [[DBG1]] = !{} diff --git a/clang/test/CodeGen/debug-info-macro.c b/clang/test/CodeGen/debug-info-macro.c index 6294d43753f0e..9d0464102c108 100644 --- a/clang/test/CodeGen/debug-info-macro.c +++ b/clang/test/CodeGen/debug-info-macro.c @@ -25,7 +25,6 @@ // NO_MACRO-NOT: DIMacroFile // CHECK: !DICompileUnit({{.*}} macros: [[Macros:![0-9]+]] -// CHECK: [[EmptyMD:![0-9]+]] = !{} // NO_PCH: [[Macros]] = !{[[MainMacroFile:![0-9]+]], [[BuiltinMacro:![0-9]+]], {{.*}}, [[DefineC1:![0-9]+]], [[DefineA:![0-9]+]], [[UndefC1:![0-9]+]]} // PCH:[[Macros]] = !{[[MainMacroFile:![0-9]+]], [[DefineC1:![0-9]+]], [[DefineA:![0-9]+]], [[UndefC1:![0-9]+]]} diff --git a/clang/test/CodeGenCXX/debug-info-codeview-var-templates.cpp b/clang/test/CodeGenCXX/debug-info-codeview-var-templates.cpp index 0470c133688cb..dec4c01444afd 100644 --- a/clang/test/CodeGenCXX/debug-info-codeview-var-templates.cpp +++ b/clang/test/CodeGenCXX/debug-info-codeview-var-templates.cpp @@ -10,10 +10,7 @@ struct TestImplicit { int instantiate_test1() { return TestImplicit::size_var + TestImplicit::size_var; } TestImplicit gv1; -// CHECK: ![[empty:[0-9]+]] = !{} - // CHECK: ![[A:[^ ]*]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TestImplicit", -// CHECK-SAME: elements: ![[empty]] template bool vtpl; struct TestSpecialization { @@ -22,7 +19,6 @@ struct TestSpecialization { } gv2; // CHECK: ![[A:[^ ]*]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TestSpecialization", -// CHECK-SAME: elements: ![[empty]] template bool a; template struct b; @@ -32,4 +28,3 @@ struct TestPartial { } c; // CHECK: ![[A:[^ ]*]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TestPartial", -// CHECK-SAME: elements: ![[empty]] diff --git a/clang/test/CodeGenCXX/debug-info-cxx1y.cpp b/clang/test/CodeGenCXX/debug-info-cxx1y.cpp index f1298b1d858c2..6ec55626033d6 100644 --- a/clang/test/CodeGenCXX/debug-info-cxx1y.cpp +++ b/clang/test/CodeGenCXX/debug-info-cxx1y.cpp @@ -1,7 +1,6 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm-only -std=c++14 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s // CHECK: imports: [[IMPS:![0-9]*]] -// CHECK: [[EMPTY:![0-9]*]] = !{} // CHECK: [[IMPS]] = !{[[IMP:![0-9]*]]} // CHECK: [[IMP]] = !DIImportedEntity( @@ -12,6 +11,7 @@ // CHECK: [[TYPE_LIST]] = !{[[INT:![0-9]*]]} // CHECK: [[INT]] = !DIBasicType(name: "int" +// CHECK: [[EMPTY:![0-9]*]] = !{} // CHECK: [[FOO:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", // CHECK-SAME: elements: [[EMPTY]] diff --git a/clang/test/CodeGenCXX/debug-info-template.cpp b/clang/test/CodeGenCXX/debug-info-template.cpp index 2ce0166590aa1..7e8ccbcc5e5d7 100644 --- a/clang/test/CodeGenCXX/debug-info-template.cpp +++ b/clang/test/CodeGenCXX/debug-info-template.cpp @@ -5,7 +5,6 @@ // CHECK: @nn = dso_local global %s
[clang] [lld] [llvm] Clang2 (PR #105453)
https://github.com/kyulee-com created https://github.com/llvm/llvm-project/pull/105453 None >From 3ed59135c01fcc4d5b3ffa172575bbc74bbb0fb8 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Wed, 24 Apr 2024 09:40:34 -0700 Subject: [PATCH 1/4] [MachineOutliner][NFC] Refactor --- llvm/include/llvm/CodeGen/MachineOutliner.h | 5 +- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 12 - llvm/lib/CodeGen/MachineOutliner.cpp | 55 +++- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 +-- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 3 +- 5 files changed, 48 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index eaba6c9b18f2bb..84937a8b563ac0 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -234,11 +234,11 @@ struct OutlinedFunction { unsigned FrameConstructionID = 0; /// Return the number of candidates for this \p OutlinedFunction. - unsigned getOccurrenceCount() const { return Candidates.size(); } + virtual unsigned getOccurrenceCount() const { return Candidates.size(); } /// Return the number of bytes it would take to outline this /// function. - unsigned getOutliningCost() const { + virtual unsigned getOutliningCost() const { unsigned CallOverhead = 0; for (const Candidate &C : Candidates) CallOverhead += C.getCallOverhead(); @@ -272,6 +272,7 @@ struct OutlinedFunction { } OutlinedFunction() = delete; + virtual ~OutlinedFunction() = default; }; } // namespace outliner } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 882cadea223695..a833a541e4e025 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2088,14 +2088,22 @@ class TargetInstrInfo : public MCInstrInfo { /// Returns a \p outliner::OutlinedFunction struct containing target-specific /// information for a set of outlining candidates. Returns std::nullopt if the - /// candidates are not suitable for outlining. + /// candidates are not suitable for outlining. \p MinRep is the minimum + /// number of times the instruction sequence must be repeated. virtual std::optional getOutliningCandidateInfo( const MachineModuleInfo &MMI, - std::vector &RepeatedSequenceLocs) const { + std::vector &RepeatedSequenceLocs, + unsigned MipRep) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!"); } + virtual std::optional getOutliningCandidateInfo( + const MachineModuleInfo &MMI, + std::vector &RepeatedSequenceLocs) const { +return getOutliningCandidateInfo(MMI, RepeatedSequenceLocs, /*MipRep=*/2); + } + /// Optional target hook to create the LLVM IR attributes for the outlined /// function. If overridden, the overriding function must call the default /// implementation. diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 4b56a467b8d076..eecf27613a2c31 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -456,8 +456,9 @@ struct MachineOutliner : public ModulePass { /// \param Mapper Contains outlining mapping information. /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions /// each type of candidate. - void findCandidates(InstructionMapper &Mapper, - std::vector &FunctionList); + void + findCandidates(InstructionMapper &Mapper, + std::vector> &FunctionList); /// Replace the sequences of instructions represented by \p OutlinedFunctions /// with calls to functions. @@ -465,7 +466,9 @@ struct MachineOutliner : public ModulePass { /// \param M The module we are outlining from. /// \param FunctionList A list of functions to be inserted into the module. /// \param Mapper Contains the instruction mappings for the module. - bool outline(Module &M, std::vector &FunctionList, + /// \param[out] OutlinedFunctionNum The outlined function number. + bool outline(Module &M, + std::vector> &FunctionList, InstructionMapper &Mapper, unsigned &OutlinedFunctionNum); /// Creates a function for \p OF and inserts it into the module. @@ -583,7 +586,8 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) { } void MachineOutliner::findCandidates( -InstructionMapper &Mapper, std::vector &FunctionList) { +InstructionMapper &Mapper, +std::vector> &FunctionList) { FunctionList.clear(); SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants); @@ -684,7 +688,7 @@ void MachineOutliner::findCandidates( continue; } -FunctionList.push_back(*OF); +FunctionList.push_back(std::make_unique(*OF)); } } @@ -827,10 +831,9 @@ MachineFunction *Machin
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90304 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH 1/3] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compila
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90304 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH 1/2] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compila
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90304 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH 1/3] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compila
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90304 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH 1/4] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compila
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90304 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH 1/4] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compila
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90304 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH 1/4] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compila
[clang] Clang2 (PR #105453)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/105453 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH 1/5] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compil
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90304 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH 1/6] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compila
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90304 >From 966922b9921669d48eb750c36ce3c9b792ba8161 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 12:58:54 -0700 Subject: [PATCH 1/6] [CGData] Clang Optinos --- clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/CommonArgs.cpp | 27 + clang/lib/Driver/ToolChains/Darwin.cpp | 46 ++ clang/test/Driver/codegen-data.c | 42 4 files changed, 127 insertions(+) create mode 100644 clang/test/Driver/codegen-data.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f78032255f036f..b400af5d99c654 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate : Joined<["-"], "fcodegen-data-generate">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into default.cgdata">; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Emit codegen data into object file. LLD for MachO (for now) merges them into /default.cgdata">; +def fcodegen_data_use : Joined<["-"], "fcodegen-data-use">, +Group, Visibility<[ClangOption, CC1Option]>, +HelpText<"Use codegen data read from default.cgdata to optimize the binary">; +def fcodegen_data_use_EQ : Joined<["-"], "fcodegen-data-use=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, +HelpText<"Use codegen data read from /default.cgdata to optimize the binary">; def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2ce6779f4b43e3..5fa502d64c0300 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, + options::OPT_fcodegen_data_generate_EQ); + auto *CodeGenDataUseArg = Args.getLastArg(options::OPT_fcodegen_data_use, +options::OPT_fcodegen_data_use_EQ); + + // We only allow one of them to be specified. + if (CodeGenDataGenArg && CodeGenDataUseArg) +D.Diag(diag::err_drv_argument_not_allowed_with) +<< CodeGenDataGenArg->getAsString(Args) +<< CodeGenDataUseArg->getAsString(Args); + + // For codegen data gen, the output file is passed to the linker + // while a boolean flag is passed to the LLVM backend. + if (CodeGenDataGenArg) +addArg(Twine("-codegen-data-generate")); + + // For codegen data use, the input file is passed to the LLVM backend. + if (CodeGenDataUseArg) { +SmallString<128> Path(CodeGenDataUseArg->getNumValues() == 0 + ? "" + : CodeGenDataUseArg->getValue()); +if (Path.empty() || llvm::sys::fs::is_directory(Path)) + llvm::sys::path::append(Path, "default.cgdata"); +addArg(Twine("-codegen-data-use-path=" + Path.str())); + } } void tools::addOpenMPDeviceRTL(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 5e7f9290e2009d..9e72e280109640 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -476,6 +476,19 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, llvm::sys::path::append(Path, "default.profdata"); CmdArgs.push_back(Args.MakeArgString(Twine("--cs-profile-path=") + Path)); } + +auto *CodeGenDataGenArg = +Args.getLastArg(options::OPT_fcodegen_data_generate, +options::OPT_fcodegen_data_generate_EQ); +if (CodeGenDataGenArg) { + SmallString<128> Path(CodeGenDataGenArg->getNumValues() == 0 +? "" +: CodeGenDataGenArg->getValue()); + if (Path.empty() || llvm::sys::fs::is_directory(Path)) +llvm::sys::path::append(Path, "default.cgdata"); + CmdArgs.push_back( + Args.MakeArgString(Twine("--codegen-data-generate-path=") + Path)); +} } } @@ -633,6 +646,39 @@ void darwin::Linker::ConstructJob(Compila
[clang] [CGData] Clang Options (PR #90304)
kyulee-com wrote: > Can we add some documentation to > https://github.com/llvm/llvm-project/blob/main/clang/docs/UsersManual.rst? > This could also be a separate PR. > > LGTM, but I want to give others a chance to review. Added some documentation, and simplify the flag use by removing the directory case as discussed in #90166. https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CGData] Clang Options (PR #90304)
@@ -1894,6 +1894,18 @@ def fprofile_selected_function_group : Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, HelpText<"Partition functions into N groups using -fprofile-function-groups and select only functions in group i to be instrumented. The valid range is 0 to N-1 inclusive">, MarshallingInfoInt>; +def fcodegen_data_generate_EQ : Joined<["-"], "fcodegen-data-generate=">, +Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, kyulee-com wrote: Yeah. It's not needed. Instead, add CLOption similar to the IRPGO flags. https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CGData] Clang Options (PR #90304)
@@ -2753,6 +2753,33 @@ void tools::addMachineOutlinerArgs(const Driver &D, addArg(Twine("-enable-machine-outliner=never")); } } + + auto *CodeGenDataGenArg = + Args.getLastArg(options::OPT_fcodegen_data_generate, kyulee-com wrote: Thanks for the suggestion! I now use `_EQ` flag only for various checks. https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [ThinLTO][NFC] Prep for two-codegen rounds (PR #108569)
https://github.com/kyulee-com created https://github.com/llvm/llvm-project/pull/108569 None >From dd7de1a900dceb411931d9d5a49b164096c314a2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 4 +- 4 files changed, 47 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 5d9a5cbd18f156..400e34527b6c87 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1474,7 +1474,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1840,45 +1841,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + fo
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com created https://github.com/llvm/llvm-project/pull/108614 None >From dd7de1a900dceb411931d9d5a49b164096c314a2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/2] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 4 +- 4 files changed, 47 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 5d9a5cbd18f156..400e34527b6c87 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1474,7 +1474,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1840,45 +1841,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); +
[clang] [CGData] Clang Options (PR #90304)
kyulee-com wrote: @NuriAmari Do you see or expect any issues with using distributed ThinLTO? https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614 >From dd7de1a900dceb411931d9d5a49b164096c314a2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/2] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 4 +- 4 files changed, 47 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 5d9a5cbd18f156..400e34527b6c87 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1474,7 +1474,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1840,45 +1841,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614 >From dd7de1a900dceb411931d9d5a49b164096c314a2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/2] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 4 +- 4 files changed, 47 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 5d9a5cbd18f156..400e34527b6c87 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1474,7 +1474,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1840,45 +1841,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614 >From 95709740e820c0efddb1fdb53436a03194a1b88e Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/2] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 5d9a5cbd18f156..400e34527b6c87 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1474,7 +1474,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1840,45 +1841,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614 >From 95709740e820c0efddb1fdb53436a03194a1b88e Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/2] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 5d9a5cbd18f156..400e34527b6c87 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1474,7 +1474,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1840,45 +1841,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [CGData] Clang Options (PR #90304)
kyulee-com wrote: I think I've addressed all the comments and the usage is fairly straightforward. Can someone take another look? https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90934 >From 95709740e820c0efddb1fdb53436a03194a1b88e Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 5d9a5cbd18f156..400e34527b6c87 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1474,7 +1474,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1840,45 +1841,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for (aut
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90934 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90934 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90934 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90934 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90933 >From 95709740e820c0efddb1fdb53436a03194a1b88e Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/2] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 5d9a5cbd18f156..400e34527b6c87 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1474,7 +1474,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1840,45 +1841,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90933 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/2] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90934 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for (aut
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CGData] Clang Options (PR #90304)
https://github.com/kyulee-com closed https://github.com/llvm/llvm-project/pull/90304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com ready_for_review https://github.com/llvm/llvm-project/pull/90934 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com ready_for_review https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90933 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/2] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90934 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/2] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90933 >From 411fc459e58a65d9599c917f220ba68bb799baac Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 13 Sep 2024 08:51:00 -0700 Subject: [PATCH 1/4] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds --- llvm/include/llvm/CGData/CodeGenData.h| 16 +++ llvm/lib/CGData/CodeGenData.cpp | 81 +- llvm/lib/LTO/CMakeLists.txt | 1 + llvm/lib/LTO/LTO.cpp | 103 +- llvm/lib/LTO/LTOBackend.cpp | 11 ++ .../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 + 7 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 84133a433170fe..1e1afe99327650 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); + void warn(Error E, StringRef Whence = ""); void warn(Twine Message, std::string Whence = "", std::string Hint = ""); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 55d2504231c744..ff8e5dd7c75790 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #define DEBUG_TYPE "cg-data" @@ -30,6 +31,14 @@ cl::opt cl::opt CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read")); +cl::opt CodeGenDataThinLTOTwoRounds( +"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, +cl::desc("Enable two-round ThinLTO code generation. The first round " + "emits codegen data, while the second round uses the emitted " + "codegen data for further optimizations.")); + +// Path to where the optimized bitcodes are saved and restored for ThinLTO. +static SmallString<128> CodeGenDataThinLTOTwoRoundsPath; static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg = "") { @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() { std::call_once(CodeGenData::OnceFlag, []() { Instance = std::unique_ptr(new CodeGenData()); -if (CodeGenDataGenerate) +if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) Instance->EmitCGData = true; else if (!CodeGenDataUsePath.empty()) { // Initialize the global CGData if the input file name is given. @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); +} + +void initializeTwoCodegenRounds() { + assert(CodeGenDataThinLTOTwoRounds); + if (auto EC = llvm::sys::fs::createUniqueDirectory( + "cgdata", CodeGenDataThinLTOTwoRoundsPath)) +report_fatal_error(Twine("Failed to create directory: ") + EC.message()); +} + +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) +report_fatal_error(Twine("Failed to open ") + Path + + " to save optimized bitcode: " + EC.message()); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoun
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614 >From 411fc459e58a65d9599c917f220ba68bb799baac Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 13 Sep 2024 08:51:00 -0700 Subject: [PATCH 1/4] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds --- llvm/include/llvm/CGData/CodeGenData.h| 16 +++ llvm/lib/CGData/CodeGenData.cpp | 81 +- llvm/lib/LTO/CMakeLists.txt | 1 + llvm/lib/LTO/LTO.cpp | 103 +- llvm/lib/LTO/LTOBackend.cpp | 11 ++ .../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 + 7 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 84133a433170fe..1e1afe99327650 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); + void warn(Error E, StringRef Whence = ""); void warn(Twine Message, std::string Whence = "", std::string Hint = ""); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 55d2504231c744..ff8e5dd7c75790 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #define DEBUG_TYPE "cg-data" @@ -30,6 +31,14 @@ cl::opt cl::opt CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read")); +cl::opt CodeGenDataThinLTOTwoRounds( +"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, +cl::desc("Enable two-round ThinLTO code generation. The first round " + "emits codegen data, while the second round uses the emitted " + "codegen data for further optimizations.")); + +// Path to where the optimized bitcodes are saved and restored for ThinLTO. +static SmallString<128> CodeGenDataThinLTOTwoRoundsPath; static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg = "") { @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() { std::call_once(CodeGenData::OnceFlag, []() { Instance = std::unique_ptr(new CodeGenData()); -if (CodeGenDataGenerate) +if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) Instance->EmitCGData = true; else if (!CodeGenDataUsePath.empty()) { // Initialize the global CGData if the input file name is given. @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); +} + +void initializeTwoCodegenRounds() { + assert(CodeGenDataThinLTOTwoRounds); + if (auto EC = llvm::sys::fs::createUniqueDirectory( + "cgdata", CodeGenDataThinLTOTwoRoundsPath)) +report_fatal_error(Twine("Failed to create directory: ") + EC.message()); +} + +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) +report_fatal_error(Twine("Failed to open ") + Path + + " to save optimized bitcode: " + EC.message()); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRou
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614 >From 411fc459e58a65d9599c917f220ba68bb799baac Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 13 Sep 2024 08:51:00 -0700 Subject: [PATCH 1/5] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds --- llvm/include/llvm/CGData/CodeGenData.h| 16 +++ llvm/lib/CGData/CodeGenData.cpp | 81 +- llvm/lib/LTO/CMakeLists.txt | 1 + llvm/lib/LTO/LTO.cpp | 103 +- llvm/lib/LTO/LTOBackend.cpp | 11 ++ .../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 + 7 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 84133a433170fe..1e1afe99327650 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); + void warn(Error E, StringRef Whence = ""); void warn(Twine Message, std::string Whence = "", std::string Hint = ""); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 55d2504231c744..ff8e5dd7c75790 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #define DEBUG_TYPE "cg-data" @@ -30,6 +31,14 @@ cl::opt cl::opt CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read")); +cl::opt CodeGenDataThinLTOTwoRounds( +"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, +cl::desc("Enable two-round ThinLTO code generation. The first round " + "emits codegen data, while the second round uses the emitted " + "codegen data for further optimizations.")); + +// Path to where the optimized bitcodes are saved and restored for ThinLTO. +static SmallString<128> CodeGenDataThinLTOTwoRoundsPath; static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg = "") { @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() { std::call_once(CodeGenData::OnceFlag, []() { Instance = std::unique_ptr(new CodeGenData()); -if (CodeGenDataGenerate) +if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) Instance->EmitCGData = true; else if (!CodeGenDataUsePath.empty()) { // Initialize the global CGData if the input file name is given. @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); +} + +void initializeTwoCodegenRounds() { + assert(CodeGenDataThinLTOTwoRounds); + if (auto EC = llvm::sys::fs::createUniqueDirectory( + "cgdata", CodeGenDataThinLTOTwoRoundsPath)) +report_fatal_error(Twine("Failed to create directory: ") + EC.message()); +} + +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) +report_fatal_error(Twine("Failed to open ") + Path + + " to save optimized bitcode: " + EC.message()); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRou
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
kyulee-com wrote: @teresajohnson Thank you for your review and valuable feedback! https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com closed https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
@@ -1513,6 +1522,171 @@ class InProcessThinBackend : public ThinBackendProc { return Error::success(); } }; + +/// This backend is utilized in the first round of a two-codegen round process. +/// It first saves optimized bitcode files to disk before the codegen process +/// begins. After codegen, it stores the resulting object files in a scratch +/// buffer. Note the codegen data stored in the scratch buffer will be extracted +/// and merged in the subsequent step. +class FirstRoundThinBackend : public InProcessThinBackend { + AddStreamFn IRAddStream; + FileCache IRCache; + +public: + FirstRoundThinBackend( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + ThreadPoolStrategy ThinLTOParallelism, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn CGAddStream, FileCache CGCache, AddStreamFn IRAddStream, + FileCache IRCache) + : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism, + ModuleToDefinedGVSummaries, std::move(CGAddStream), + std::move(CGCache), /*OnWrite=*/nullptr, + /*ShouldEmitIndexFiles=*/false, + /*ShouldEmitImportsFiles=*/false), +IRAddStream(std::move(IRAddStream)), IRCache(std::move(IRCache)) {} + + Error runThinLTOBackendThread( + AddStreamFn CGAddStream, FileCache CGCache, unsigned Task, + BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals, + MapVector &ModuleMap) override { +auto RunThinBackend = [&](AddStreamFn CGAddStream, + AddStreamFn IRAddStream) { + LTOLLVMContext BackendContext(Conf); + Expected> MOrErr = BM.parseModule(BackendContext); + if (!MOrErr) +return MOrErr.takeError(); + + return thinBackend(Conf, Task, CGAddStream, **MOrErr, CombinedIndex, + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly, IRAddStream); +}; + +auto ModuleID = BM.getModuleIdentifier(); +// Like InProcessThinBackend, we produce index files as needed for +// FirstRoundThinBackend. However, these files are not generated for +// SecondRoundThinBackend. +if (ShouldEmitIndexFiles) { + if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str())) +return E; +} + +assert((CGCache.isValid() == IRCache.isValid()) && + "Both caches for CG and IR should have matching availability"); +if (!CGCache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) || +all_of(CombinedIndex.getModuleHash(ModuleID), + [](uint32_t V) { return V == 0; })) + // Cache disabled or no entry for this module in the combined index or + // no module hash. + return RunThinBackend(CGAddStream, IRAddStream); + +// Get CGKey for caching object in CGCache. +std::string CGKey = computeLTOCacheKey( +Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR, +DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls); +Expected CacheCGAddStreamOrErr = +CGCache(Task, CGKey, ModuleID); +if (Error Err = CacheCGAddStreamOrErr.takeError()) + return Err; +AddStreamFn &CacheCGAddStream = *CacheCGAddStreamOrErr; + +// Get IRKey for caching (optimized) IR in IRCache with an extra ID. +std::string IRKey = computeLTOCacheKey( +Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR, +DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls, /*ExtraID=*/"IR"); +Expected CacheIRAddStreamOrErr = +IRCache(Task, IRKey, ModuleID); +if (Error Err = CacheIRAddStreamOrErr.takeError()) + return Err; +AddStreamFn &CacheIRAddStream = *CacheIRAddStreamOrErr; + +assert((CacheCGAddStream == nullptr) == (CacheIRAddStream == nullptr) && + "Both CG and IR caching should be matched"); +if (CacheIRAddStream) { + LLVM_DEBUG(dbgs() << "[FirstRound] Cache Miss for " +<< BM.getModuleIdentifier() << "\n"); + return RunThinBackend(CacheCGAddStream, CacheIRAddStream); +} + +return Error::success(); + } +}; + +/// This backend operates in the second round of a two-codegen round process. +/// It starts by reading the optimized bitcode files that were saved during the +/// first round. The backend then executes the codegen only to further optimize +/// the code, utilizing the codegen data merged from the first round. Finally, +/// it writes the resulting object files as usual. +class SecondRoundThinBackend : public InProcessThinBackend { + std::unique_ptr> IRFiles; + stable_hash CombinedCGDataHash; + +public: + SecondRoundThinBackend( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, +
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
@@ -74,6 +75,8 @@ static cl::opt ThinLTOAssumeMerged( cl::desc("Assume the input has already undergone ThinLTO function " "importing and the other pre-optimization pipeline changes.")); +extern cl::opt CodeGenDataThinLTOTwoRounds; kyulee-com wrote: Deleted. https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
@@ -1513,6 +1522,171 @@ class InProcessThinBackend : public ThinBackendProc { return Error::success(); } }; + +/// This backend is utilized in the first round of a two-codegen round process. +/// It first saves optimized bitcode files to disk before the codegen process +/// begins. After codegen, it stores the resulting object files in a scratch +/// buffer. Note the codegen data stored in the scratch buffer will be extracted +/// and merged in the subsequent step. +class FirstRoundThinBackend : public InProcessThinBackend { + AddStreamFn IRAddStream; + FileCache IRCache; + +public: + FirstRoundThinBackend( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + ThreadPoolStrategy ThinLTOParallelism, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn CGAddStream, FileCache CGCache, AddStreamFn IRAddStream, + FileCache IRCache) + : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism, + ModuleToDefinedGVSummaries, std::move(CGAddStream), + std::move(CGCache), /*OnWrite=*/nullptr, + /*ShouldEmitIndexFiles=*/false, + /*ShouldEmitImportsFiles=*/false), +IRAddStream(std::move(IRAddStream)), IRCache(std::move(IRCache)) {} + + Error runThinLTOBackendThread( + AddStreamFn CGAddStream, FileCache CGCache, unsigned Task, + BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals, + MapVector &ModuleMap) override { +auto RunThinBackend = [&](AddStreamFn CGAddStream, + AddStreamFn IRAddStream) { + LTOLLVMContext BackendContext(Conf); + Expected> MOrErr = BM.parseModule(BackendContext); + if (!MOrErr) +return MOrErr.takeError(); + + return thinBackend(Conf, Task, CGAddStream, **MOrErr, CombinedIndex, + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly, IRAddStream); +}; + +auto ModuleID = BM.getModuleIdentifier(); +// Like InProcessThinBackend, we produce index files as needed for +// FirstRoundThinBackend. However, these files are not generated for +// SecondRoundThinBackend. +if (ShouldEmitIndexFiles) { + if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str())) +return E; +} + +assert((CGCache.isValid() == IRCache.isValid()) && + "Both caches for CG and IR should have matching availability"); +if (!CGCache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) || +all_of(CombinedIndex.getModuleHash(ModuleID), + [](uint32_t V) { return V == 0; })) + // Cache disabled or no entry for this module in the combined index or + // no module hash. + return RunThinBackend(CGAddStream, IRAddStream); + +// Get CGKey for caching object in CGCache. +std::string CGKey = computeLTOCacheKey( +Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR, +DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls); +Expected CacheCGAddStreamOrErr = +CGCache(Task, CGKey, ModuleID); +if (Error Err = CacheCGAddStreamOrErr.takeError()) + return Err; +AddStreamFn &CacheCGAddStream = *CacheCGAddStreamOrErr; + +// Get IRKey for caching (optimized) IR in IRCache with an extra ID. +std::string IRKey = computeLTOCacheKey( +Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR, +DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls, /*ExtraID=*/"IR"); +Expected CacheIRAddStreamOrErr = +IRCache(Task, IRKey, ModuleID); +if (Error Err = CacheIRAddStreamOrErr.takeError()) + return Err; +AddStreamFn &CacheIRAddStream = *CacheIRAddStreamOrErr; + +assert((CacheCGAddStream == nullptr) == (CacheIRAddStream == nullptr) && + "Both CG and IR caching should be matched"); +if (CacheIRAddStream) { + LLVM_DEBUG(dbgs() << "[FirstRound] Cache Miss for " +<< BM.getModuleIdentifier() << "\n"); + return RunThinBackend(CacheCGAddStream, CacheIRAddStream); +} + +return Error::success(); + } +}; + +/// This backend operates in the second round of a two-codegen round process. +/// It starts by reading the optimized bitcode files that were saved during the +/// first round. The backend then executes the codegen only to further optimize +/// the code, utilizing the codegen data merged from the first round. Finally, +/// it writes the resulting object files as usual. +class SecondRoundThinBackend : public InProcessThinBackend { + std::unique_ptr> IRFiles; + stable_hash CombinedCGDataHash; + +public: + SecondRoundThinBackend( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, +
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
@@ -0,0 +1,94 @@ +; This test verifies whether we can outline a singleton instance (i.e., an instance that does not repeat) kyulee-com wrote: Added more comments on the test. https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
@@ -1513,6 +1522,171 @@ class InProcessThinBackend : public ThinBackendProc { return Error::success(); } }; + +/// This backend is utilized in the first round of a two-codegen round process. +/// It first saves optimized bitcode files to disk before the codegen process +/// begins. After codegen, it stores the resulting object files in a scratch +/// buffer. Note the codegen data stored in the scratch buffer will be extracted +/// and merged in the subsequent step. +class FirstRoundThinBackend : public InProcessThinBackend { + AddStreamFn IRAddStream; + FileCache IRCache; + +public: + FirstRoundThinBackend( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + ThreadPoolStrategy ThinLTOParallelism, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn CGAddStream, FileCache CGCache, AddStreamFn IRAddStream, + FileCache IRCache) + : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism, + ModuleToDefinedGVSummaries, std::move(CGAddStream), + std::move(CGCache), /*OnWrite=*/nullptr, + /*ShouldEmitIndexFiles=*/false, + /*ShouldEmitImportsFiles=*/false), +IRAddStream(std::move(IRAddStream)), IRCache(std::move(IRCache)) {} + + Error runThinLTOBackendThread( + AddStreamFn CGAddStream, FileCache CGCache, unsigned Task, + BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals, + MapVector &ModuleMap) override { +auto RunThinBackend = [&](AddStreamFn CGAddStream, + AddStreamFn IRAddStream) { + LTOLLVMContext BackendContext(Conf); + Expected> MOrErr = BM.parseModule(BackendContext); + if (!MOrErr) +return MOrErr.takeError(); + + return thinBackend(Conf, Task, CGAddStream, **MOrErr, CombinedIndex, + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly, IRAddStream); +}; + +auto ModuleID = BM.getModuleIdentifier(); +// Like InProcessThinBackend, we produce index files as needed for +// FirstRoundThinBackend. However, these files are not generated for +// SecondRoundThinBackend. +if (ShouldEmitIndexFiles) { + if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str())) +return E; +} + +assert((CGCache.isValid() == IRCache.isValid()) && + "Both caches for CG and IR should have matching availability"); +if (!CGCache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) || +all_of(CombinedIndex.getModuleHash(ModuleID), + [](uint32_t V) { return V == 0; })) + // Cache disabled or no entry for this module in the combined index or + // no module hash. + return RunThinBackend(CGAddStream, IRAddStream); + +// Get CGKey for caching object in CGCache. +std::string CGKey = computeLTOCacheKey( +Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR, +DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls); +Expected CacheCGAddStreamOrErr = +CGCache(Task, CGKey, ModuleID); +if (Error Err = CacheCGAddStreamOrErr.takeError()) + return Err; +AddStreamFn &CacheCGAddStream = *CacheCGAddStreamOrErr; + +// Get IRKey for caching (optimized) IR in IRCache with an extra ID. +std::string IRKey = computeLTOCacheKey( kyulee-com wrote: Define d`recomputeLTOCacheKey` to rehash the key with additional string. https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90933 >From 411fc459e58a65d9599c917f220ba68bb799baac Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 13 Sep 2024 08:51:00 -0700 Subject: [PATCH 1/5] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds --- llvm/include/llvm/CGData/CodeGenData.h| 16 +++ llvm/lib/CGData/CodeGenData.cpp | 81 +- llvm/lib/LTO/CMakeLists.txt | 1 + llvm/lib/LTO/LTO.cpp | 103 +- llvm/lib/LTO/LTOBackend.cpp | 11 ++ .../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 + 7 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 84133a433170fe..1e1afe99327650 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); + void warn(Error E, StringRef Whence = ""); void warn(Twine Message, std::string Whence = "", std::string Hint = ""); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 55d2504231c744..ff8e5dd7c75790 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #define DEBUG_TYPE "cg-data" @@ -30,6 +31,14 @@ cl::opt cl::opt CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read")); +cl::opt CodeGenDataThinLTOTwoRounds( +"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, +cl::desc("Enable two-round ThinLTO code generation. The first round " + "emits codegen data, while the second round uses the emitted " + "codegen data for further optimizations.")); + +// Path to where the optimized bitcodes are saved and restored for ThinLTO. +static SmallString<128> CodeGenDataThinLTOTwoRoundsPath; static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg = "") { @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() { std::call_once(CodeGenData::OnceFlag, []() { Instance = std::unique_ptr(new CodeGenData()); -if (CodeGenDataGenerate) +if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) Instance->EmitCGData = true; else if (!CodeGenDataUsePath.empty()) { // Initialize the global CGData if the input file name is given. @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); +} + +void initializeTwoCodegenRounds() { + assert(CodeGenDataThinLTOTwoRounds); + if (auto EC = llvm::sys::fs::createUniqueDirectory( + "cgdata", CodeGenDataThinLTOTwoRoundsPath)) +report_fatal_error(Twine("Failed to create directory: ") + EC.message()); +} + +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) +report_fatal_error(Twine("Failed to open ") + Path + + " to save optimized bitcode: " + EC.message()); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoun
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614 >From 1cac61d42d7965a095aba3b47cca1f5e3c27fd82 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 13 Sep 2024 08:51:00 -0700 Subject: [PATCH 1/4] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds --- llvm/include/llvm/CGData/CodeGenData.h| 16 +++ llvm/lib/CGData/CodeGenData.cpp | 81 +- llvm/lib/LTO/CMakeLists.txt | 1 + llvm/lib/LTO/LTO.cpp | 103 +- llvm/lib/LTO/LTOBackend.cpp | 11 ++ .../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 + 7 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 84133a433170fe..1e1afe99327650 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); + void warn(Error E, StringRef Whence = ""); void warn(Twine Message, std::string Whence = "", std::string Hint = ""); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 55d2504231c744..ff8e5dd7c75790 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #define DEBUG_TYPE "cg-data" @@ -30,6 +31,14 @@ cl::opt cl::opt CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read")); +cl::opt CodeGenDataThinLTOTwoRounds( +"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, +cl::desc("Enable two-round ThinLTO code generation. The first round " + "emits codegen data, while the second round uses the emitted " + "codegen data for further optimizations.")); + +// Path to where the optimized bitcodes are saved and restored for ThinLTO. +static SmallString<128> CodeGenDataThinLTOTwoRoundsPath; static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg = "") { @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() { std::call_once(CodeGenData::OnceFlag, []() { Instance = std::unique_ptr(new CodeGenData()); -if (CodeGenDataGenerate) +if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) Instance->EmitCGData = true; else if (!CodeGenDataUsePath.empty()) { // Initialize the global CGData if the input file name is given. @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); +} + +void initializeTwoCodegenRounds() { + assert(CodeGenDataThinLTOTwoRounds); + if (auto EC = llvm::sys::fs::createUniqueDirectory( + "cgdata", CodeGenDataThinLTOTwoRoundsPath)) +report_fatal_error(Twine("Failed to create directory: ") + EC.message()); +} + +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) +report_fatal_error(Twine("Failed to open ") + Path + + " to save optimized bitcode: " + EC.message()); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRou
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90933 >From d4ecc690fadc2787425b0b1b94f07ff7f981bdfe Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 13 Sep 2024 08:51:00 -0700 Subject: [PATCH 1/5] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds --- llvm/include/llvm/CGData/CodeGenData.h| 16 +++ llvm/lib/CGData/CodeGenData.cpp | 81 +- llvm/lib/LTO/CMakeLists.txt | 1 + llvm/lib/LTO/LTO.cpp | 103 +- llvm/lib/LTO/LTOBackend.cpp | 11 ++ .../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 + 7 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 84133a433170fe..1e1afe99327650 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); + void warn(Error E, StringRef Whence = ""); void warn(Twine Message, std::string Whence = "", std::string Hint = ""); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 55d2504231c744..ff8e5dd7c75790 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #define DEBUG_TYPE "cg-data" @@ -30,6 +31,14 @@ cl::opt cl::opt CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read")); +cl::opt CodeGenDataThinLTOTwoRounds( +"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, +cl::desc("Enable two-round ThinLTO code generation. The first round " + "emits codegen data, while the second round uses the emitted " + "codegen data for further optimizations.")); + +// Path to where the optimized bitcodes are saved and restored for ThinLTO. +static SmallString<128> CodeGenDataThinLTOTwoRoundsPath; static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg = "") { @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() { std::call_once(CodeGenData::OnceFlag, []() { Instance = std::unique_ptr(new CodeGenData()); -if (CodeGenDataGenerate) +if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) Instance->EmitCGData = true; else if (!CodeGenDataUsePath.empty()) { // Initialize the global CGData if the input file name is given. @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); +} + +void initializeTwoCodegenRounds() { + assert(CodeGenDataThinLTOTwoRounds); + if (auto EC = llvm::sys::fs::createUniqueDirectory( + "cgdata", CodeGenDataThinLTOTwoRoundsPath)) +report_fatal_error(Twine("Failed to create directory: ") + EC.message()); +} + +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) +report_fatal_error(Twine("Failed to open ") + Path + + " to save optimized bitcode: " + EC.message()); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoun
[clang] [llvm] Thin6 (PR #110431)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/110431 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/5] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] Thin9 (PR #110483)
https://github.com/kyulee-com created https://github.com/llvm/llvm-project/pull/110483 None >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/7] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); +
[clang] [llvm] Thin6 (PR #110431)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/110431 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/5] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] Thin6 (PR #110431)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/110431 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/5] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] Thin6 (PR #110431)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/110431 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/5] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614 >From 1bbb7e5291bb59d95d8b308a90620a4d70e35152 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 13 Sep 2024 08:51:00 -0700 Subject: [PATCH 1/5] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds --- llvm/include/llvm/CGData/CodeGenData.h| 16 +++ llvm/lib/CGData/CodeGenData.cpp | 81 +- llvm/lib/LTO/CMakeLists.txt | 1 + llvm/lib/LTO/LTO.cpp | 103 +- llvm/lib/LTO/LTOBackend.cpp | 11 ++ .../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 + 7 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 84133a433170fe..1e1afe99327650 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); + void warn(Error E, StringRef Whence = ""); void warn(Twine Message, std::string Whence = "", std::string Hint = ""); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 55d2504231c744..ff8e5dd7c75790 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #define DEBUG_TYPE "cg-data" @@ -30,6 +31,14 @@ cl::opt cl::opt CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read")); +cl::opt CodeGenDataThinLTOTwoRounds( +"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, +cl::desc("Enable two-round ThinLTO code generation. The first round " + "emits codegen data, while the second round uses the emitted " + "codegen data for further optimizations.")); + +// Path to where the optimized bitcodes are saved and restored for ThinLTO. +static SmallString<128> CodeGenDataThinLTOTwoRoundsPath; static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg = "") { @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() { std::call_once(CodeGenData::OnceFlag, []() { Instance = std::unique_ptr(new CodeGenData()); -if (CodeGenDataGenerate) +if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) Instance->EmitCGData = true; else if (!CodeGenDataUsePath.empty()) { // Initialize the global CGData if the input file name is given. @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); +} + +void initializeTwoCodegenRounds() { + assert(CodeGenDataThinLTOTwoRounds); + if (auto EC = llvm::sys::fs::createUniqueDirectory( + "cgdata", CodeGenDataThinLTOTwoRoundsPath)) +report_fatal_error(Twine("Failed to create directory: ") + EC.message()); +} + +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) +report_fatal_error(Twine("Failed to open ") + Path + + " to save optimized bitcode: " + EC.message()); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRou
[clang] [llvm] Thin2 (PR #106602)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/106602 >From d75c88f55a8e271ab8ad6c2a8ef3383dafdf8516 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Sun, 29 Sep 2024 18:28:15 -0700 Subject: [PATCH 1/5] [NFC] Refactor ThinBackend - Change it to a type from a function. - Store the parallelism in the type for the future use. --- llvm/include/llvm/LTO/LTO.h | 76 -- llvm/lib/LTO/LTO.cpp| 106 +++- 2 files changed, 104 insertions(+), 78 deletions(-) diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index 5c47c4df7f6a38..a6b9ede2da54e1 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -26,6 +26,7 @@ #include "llvm/Support/Caching.h" #include "llvm/Support/Error.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/thread.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" @@ -105,7 +106,6 @@ void updateMemProfAttributes(Module &Mod, const ModuleSummaryIndex &Index); class LTO; struct SymbolResolution; -class ThinBackendProc; /// An input file. This is a symbol table wrapper that only exposes the /// information that an LTO client should need in order to do symbol resolution. @@ -194,13 +194,80 @@ class InputFile { } }; +using IndexWriteCallback = std::function; + +/// This class defines the interface to the ThinLTO backend. +class ThinBackendProc { +protected: + const Config &Conf; + ModuleSummaryIndex &CombinedIndex; + const DenseMap &ModuleToDefinedGVSummaries; + IndexWriteCallback OnWrite; + bool ShouldEmitImportsFiles; + DefaultThreadPool BackendThreadPool; + std::optional Err; + std::mutex ErrMu; + +public: + ThinBackendProc( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + const DenseMap &ModuleToDefinedGVSummaries, + lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles, + ThreadPoolStrategy ThinLTOParallelism) + : Conf(Conf), CombinedIndex(CombinedIndex), +ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries), +OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles), +BackendThreadPool(ThinLTOParallelism) {} + + virtual ~ThinBackendProc() = default; + virtual Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + MapVector &ModuleMap) = 0; + Error wait() { +BackendThreadPool.wait(); +if (Err) + return std::move(*Err); +return Error::success(); + } + unsigned getThreadCount() { return BackendThreadPool.getMaxConcurrency(); } + virtual bool isSensitiveToInputOrder() { return false; } + + // Write sharded indices and (optionally) imports to disk + Error emitFiles(const FunctionImporter::ImportMapTy &ImportList, + llvm::StringRef ModulePath, + const std::string &NewModulePath) const; +}; + /// A ThinBackend defines what happens after the thin-link phase during ThinLTO. /// The details of this type definition aren't important; clients can only /// create a ThinBackend using one of the create*ThinBackend() functions below. -using ThinBackend = std::function( +using ThinBackendFunction = std::function( const Config &C, ModuleSummaryIndex &CombinedIndex, -DenseMap &ModuleToDefinedGVSummaries, +const DenseMap &ModuleToDefinedGVSummaries, AddStreamFn AddStream, FileCache Cache)>; +struct ThinBackend { + ThinBackend(ThinBackendFunction Func, ThreadPoolStrategy Parallelism) + : Func(std::move(Func)), Parallelism(std::move(Parallelism)) {} + ThinBackend() = default; + + std::unique_ptr operator()( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, FileCache Cache) { +assert(isValid() && "Invalid backend function"); +return Func(Conf, CombinedIndex, ModuleToDefinedGVSummaries, +std::move(AddStream), std::move(Cache)); + } + ThreadPoolStrategy getParallelism() const { return Parallelism; } + bool isValid() const { return static_cast(Func); } + +private: + ThinBackendFunction Func = nullptr; + ThreadPoolStrategy Parallelism; +}; /// This ThinBackend runs the individual backend jobs in-process. /// The default value means to use one job per hardware core (not hyper-thread). @@ -210,7 +277,6 @@ using ThinBackend = std::function( /// to the same path as the input module, with suffix ".thinlto.bc" /// ShouldEmitImportsFiles is true it also writes a list of imported files to a /// similar path with ".imports" appended instead. -using IndexWriteCallback = std::function; ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism, IndexWriteCallback OnWrite =
[clang] [llvm] Thin3 (PR #108614)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614 >From 1cac61d42d7965a095aba3b47cca1f5e3c27fd82 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 13 Sep 2024 08:51:00 -0700 Subject: [PATCH 1/4] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds --- llvm/include/llvm/CGData/CodeGenData.h| 16 +++ llvm/lib/CGData/CodeGenData.cpp | 81 +- llvm/lib/LTO/CMakeLists.txt | 1 + llvm/lib/LTO/LTO.cpp | 103 +- llvm/lib/LTO/LTOBackend.cpp | 11 ++ .../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 + 7 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 84133a433170fe..1e1afe99327650 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); + void warn(Error E, StringRef Whence = ""); void warn(Twine Message, std::string Whence = "", std::string Hint = ""); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 55d2504231c744..ff8e5dd7c75790 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #define DEBUG_TYPE "cg-data" @@ -30,6 +31,14 @@ cl::opt cl::opt CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read")); +cl::opt CodeGenDataThinLTOTwoRounds( +"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, +cl::desc("Enable two-round ThinLTO code generation. The first round " + "emits codegen data, while the second round uses the emitted " + "codegen data for further optimizations.")); + +// Path to where the optimized bitcodes are saved and restored for ThinLTO. +static SmallString<128> CodeGenDataThinLTOTwoRoundsPath; static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg = "") { @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() { std::call_once(CodeGenData::OnceFlag, []() { Instance = std::unique_ptr(new CodeGenData()); -if (CodeGenDataGenerate) +if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) Instance->EmitCGData = true; else if (!CodeGenDataUsePath.empty()) { // Initialize the global CGData if the input file name is given. @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); +} + +void initializeTwoCodegenRounds() { + assert(CodeGenDataThinLTOTwoRounds); + if (auto EC = llvm::sys::fs::createUniqueDirectory( + "cgdata", CodeGenDataThinLTOTwoRoundsPath)) +report_fatal_error(Twine("Failed to create directory: ") + EC.message()); +} + +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) +report_fatal_error(Twine("Failed to open ") + Path + + " to save optimized bitcode: " + EC.message()); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRou
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
kyulee-com wrote: @teresajohnson Do you have any concern or comment on this direction? https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Thin6 (PR #110431)
https://github.com/kyulee-com created https://github.com/llvm/llvm-project/pull/110431 None >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/5] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); +
[clang] [llvm] Thin6 (PR #110431)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/110431 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/5] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] Thin6 (PR #110431)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/110431 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/5] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
@@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); kyulee-com wrote: > What is `Task`? Some way to disambiguate modules? Added a comment to the function. https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
kyulee-com wrote: > Oh I just saw that you have dependent changes in #90934. I think when you > create your PR, you can specify a base branch. If you select the branch for > #90934, I believe it won't show those changes in this PR. I'm not sure if you > can do that after you've created the PR, though. Yeah, I should've created a PR directly on the remote branch instead of on my fork. https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
kyulee-com wrote: Could someone please take another look? Thanks! https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
kyulee-com wrote: > * Looking at the NFC, this seems like it has very similar issues to > Propeller, which wants to redo just the codegen with a new injected profile > and BB ordering. It would be good to see if we can converge to similar > approaches. I asked @rlavaee to take a look and he is reading through the > background on this work. @rlavaee do you think Propeller could use a similar > approach to this where it saves the pre-codegen bitcode and re-loads it > instead of redoing opt? This isn't necessarily an action item for this PR, > but I wanted Rahman to take a look since he is more familiar with codegen. It's interesting to know that Propeller wants to redo the codegen. I'm happy to align with this work. We've already started discussing this and have shared some details from our side. Here's the link for more info: https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-2-thinlto-nolto/78753/11?u=kyulee-com. > * I think this should be doable to implement with distributed ThinLTO if we > want in the future, from what I can tell. But we'll need a way to save the > bitcode before codegen from a non-in-process backend (e.g. the thinBackend > invoked from clang). Not asking you to do anything for this PR on that, but > just thinking through it. Seems doable... I think technically it's doable, but initially, I believed we did not want to repeat the code generation with distributed ThinLTO unless we were willing to introduce additional synchronization and spawn distributed ThinLTO backends again with the merged codegen data. If we aim to leverage the saved optimized IR, I suppose we need to assign the same backend work to the same machine used in the first run. As commented above in the link, I wanted to confine repeating the codegen is for a single machine (In-process backend). I mainly wanted to separate the writer/reader builds for the use of distributed ThinLTO builds to avoid this complication while allowing some stale codege data. However, it's certainly worth experimenting with. > * My biggest concern about this patch as written is whether it will break > down under LTO's caching mechanism - have you tested it with caching? It > seems like it might just skip the backend completely since you aren't adding > anything to the cache key. That's a good catch! Assuming this is not a common scenario (as mentioned in the link above RFC), my initial design intentionally disables LTO's caching for correctness and simplicity by setting an empty `FileCache()` in the constructor for both the first and second round backends. To enable proper caching, we need two additional caches/streams, in addition to one for the final object output: one for the scratch object files and another for the optimized IR files. I've managed to implement this with some refactoring, details of which I will follow. https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com edited https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90933 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/7] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
kyulee-com wrote: @teresajohnson Here is the summary for the latest commit. Sorry about a few more dependent PRs whose commits also appear in this PR. - Refactored `ThinBackend`, which was also a function, but is now a type. It's set up when an LTO object is created by the linker. I can now store the original parallelism of `ThinBackend` so that I could inherit this value for new ThinBackends (for first and second round runs). https://github.com/llvm/llvm-project/pull/110461 - Refactored `FileCache`, which was a function, but is now a type. It's set up by the linker, so it's hard to get the original cache directory. Alternatively, I could create a separate flag to cache these intermediate data for a two-round run only. Instead, I inherited this folder from the original `FileCache`, which is handy as the user doesn't need to specify anything additionally. https://github.com/llvm/llvm-project/pull/110463 - Added two sets of streams and caches backing them (in `StreamCacheData`): - One for scratch object files from the first round. - One for optimized IR files from the first round. - Added `ExtraID` for `computeLTOCacheKey()`. This extra field is used to create distinct keys, as we now have three sets of streams and caches for potentially the same input bitcode. In addition to the two mentioned above (created in this LLVM pass), the last one has already been configured by the linker to produce the resulting object files for the linker. - Removed file operations to access IR files. Like object files (either scratch or final), all data are backed by buffers. An explicit streamer/buffer is passed to `thinBackend` to access these IR files instead of relying on a global flag. https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
@@ -1558,6 +1562,60 @@ class InProcessThinBackend : public ThinBackendProc { return BackendThreadPool.getMaxConcurrency(); } }; + +/// This Backend will run ThinBackend process but throw away all the output from +/// the codegen. This class facilitates the first codegen round. +class NoOutputThinBackend : public InProcessThinBackend { kyulee-com wrote: > Lastly just an idea: We could just hold the optimized bitcode in a similar > buffer in memory, rather than writing them to disk and reading them again > between rounds. Might run a bit faster. That's a great point! I've been cautious about peak memory usage, especially with large app binaries. Since the linker already buffers the resulting object files, this isn't a new concern. It's worth noting that the buffer from the first round gets discarded right before the second round, so we effectively only hold a buffer for the resulting object files. As for the optimized bitcode files, which are usually much larger than object files, I chose to write them to disk instead of keeping everything in memory. For smaller app binaries, buffering the optimized bitcode could be beneficial, as you suggested. I think we can always revisit and potentially add this as an option if needed. https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
@@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); kyulee-com wrote: > Will this work on windows? Can we use `llvm::sys::path`? Thanks for the catch! https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
@@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); kyulee-com wrote: > This is similar to a constant pointer. Can we use `ArrayRef` instead somehow? The function `mergeCodeGenData` takes ownership of `InputFiles`. Once the function returns, the scratch buffer for the produced object files will be destroyed. I think this behavior is different from that of `ArrayRef`, which provides a read-only view into a vector. https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90934 >From c1a0219457a3c162d7fa6b9d70750ba7a040d9f2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/3] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7fa69420298160..a1909d45b4d944 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1286,10 +1286,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90934 >From 0378aa86d0ad24069c191250d76bc0f3800eb7ae Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 26 Apr 2024 20:02:52 -0700 Subject: [PATCH 1/3] [ThinLTO][NFC] Prep for two-codegen rounds --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++-- llvm/include/llvm/LTO/LTOBackend.h | 1 + llvm/lib/LTO/LTO.cpp | 75 -- llvm/lib/LTO/LTOBackend.cpp| 6 ++- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 62c6a57e8b7c80..abc936f2c686dd 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1321,10 +1321,10 @@ static void runThinLTOBackend( Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = - thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], - /* ModuleMap */ nullptr, CGOpts.CmdArgs)) { + if (Error E = thinBackend( + Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index de89f4bb10dff2..8516398510d4b8 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -56,6 +56,7 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, MapVector *ModuleMap, + bool CodeGenOnly, const std::vector &CmdArgs = std::vector()); Error finalizeOptimizationRemarks( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index a88124dacfaefd..f4c25f80811a85 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1473,7 +1473,8 @@ class InProcessThinBackend : public ThinBackendProc { return MOrErr.takeError(); return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, - ImportList, DefinedGlobals, &ModuleMap); + ImportList, DefinedGlobals, &ModuleMap, + Conf.CodeGenOnly); }; auto ModuleID = BM.getModuleIdentifier(); @@ -1839,45 +1840,49 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, TimeTraceScopeExit.release(); - std::unique_ptr BackendProc = - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - AddStream, Cache); - auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - auto ProcessOneModule = [&](int I) -> Error { -auto &Mod = *(ModuleMap.begin() + I); -// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for -// combined module and parallel code generation partitions. -return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, - Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], ResolvedODR[Mod.first], - ThinLTO.ModuleMap); + auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { +auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProcess->start( + RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, + ImportLists[Mod.first], ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap); +}; + +if (BackendProcess->getThreadCount() == 1) { + // Process the modules in the order they were provided on the + // command-line. It is important for this codepath to be used for + // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists + // ThinLTO objects in the same order as the inputs, which otherwise would + // affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) +if (Error E = ProcessOneModule(I)) + return E; +} else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` + // (out of 100 sec). + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for
[clang] [llvm] [CGData][ThinLTO][NFC] Prep for two-codegen rounds (PR #90934)
https://github.com/kyulee-com closed https://github.com/llvm/llvm-project/pull/90934 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/90933 >From 4344f540008d4fd079bb009318b5b0b070bec0f8 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 13 Sep 2024 08:51:00 -0700 Subject: [PATCH 1/6] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds --- llvm/include/llvm/CGData/CodeGenData.h| 16 +++ llvm/lib/CGData/CodeGenData.cpp | 81 +- llvm/lib/LTO/CMakeLists.txt | 1 + llvm/lib/LTO/LTO.cpp | 103 +- llvm/lib/LTO/LTOBackend.cpp | 11 ++ .../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 + 7 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 84133a433170fe..1e1afe99327650 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +/// Initialize the two-codegen rounds. +void initializeTwoCodegenRounds(); + +/// Save the current module before the first codegen round. +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task); + +/// Load the current module before the second codegen round. +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context); + +/// Merge the codegen data from the input files in scratch vector in ThinLTO +/// two-codegen rounds. +Error mergeCodeGenData( +const std::unique_ptr>> InputFiles); + void warn(Error E, StringRef Whence = ""); void warn(Twine Message, std::string Whence = "", std::string Hint = ""); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 55d2504231c744..ff8e5dd7c75790 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #define DEBUG_TYPE "cg-data" @@ -30,6 +31,14 @@ cl::opt cl::opt CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read")); +cl::opt CodeGenDataThinLTOTwoRounds( +"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, +cl::desc("Enable two-round ThinLTO code generation. The first round " + "emits codegen data, while the second round uses the emitted " + "codegen data for further optimizations.")); + +// Path to where the optimized bitcodes are saved and restored for ThinLTO. +static SmallString<128> CodeGenDataThinLTOTwoRoundsPath; static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg = "") { @@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() { std::call_once(CodeGenData::OnceFlag, []() { Instance = std::unique_ptr(new CodeGenData()); -if (CodeGenDataGenerate) +if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) Instance->EmitCGData = true; else if (!CodeGenDataUsePath.empty()) { // Initialize the global CGData if the input file name is given. @@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) { } } +static std::string getPath(StringRef Dir, unsigned Task) { + return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str(); +} + +void initializeTwoCodegenRounds() { + assert(CodeGenDataThinLTOTwoRounds); + if (auto EC = llvm::sys::fs::createUniqueDirectory( + "cgdata", CodeGenDataThinLTOTwoRoundsPath)) +report_fatal_error(Twine("Failed to create directory: ") + EC.message()); +} + +void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); + if (EC) +report_fatal_error(Twine("Failed to open ") + Path + + " to save optimized bitcode: " + EC.message()); + WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); +} + +std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, + unsigned Task, + LLVMContext &Context) { + assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath)); + std::string Path = getPath(CodeGenDataThinLTOTwoRoun
[clang] [llvm] [CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds (PR #90933)
kyulee-com wrote: > IIUC, we must use `-codegen-data-generate` and `-codegen-data-use` in the > profiled and post-link build, respectively, whereas they are done in the same > build here. @rlavaee It is not strictly necessary to run both `-codegen-data-generate` and `-codegen-data-use` for each profile and post-link build. Running `-codegen-data-use` only in the post-link build (using the same codegen data file saved from the profile build) should be fine, although it might impact some efficiency in size https://github.com/llvm/llvm-project/pull/90933 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits