https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95821
>From 9452bd574023a7aef75b609d36e0ffac68e1e03d Mon Sep 17 00:00:00 2001 From: Sayhaan Siddiqui <sayh...@meta.com> Date: Mon, 17 Jun 2024 11:11:07 -0700 Subject: [PATCH 01/21] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20?= =?UTF-8?q?changes=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 [skip ci] --- bolt/include/bolt/Rewrite/DWARFRewriter.h | 4 +- bolt/lib/Core/BinaryEmitter.cpp | 1 + bolt/lib/Rewrite/DWARFRewriter.cpp | 61 ++--- clang/include/clang/Driver/Options.td | 4 + clang/lib/Driver/ToolChains/Gnu.cpp | 29 +++ cross-project-tests/lit.cfg.py | 14 +- cross-project-tests/lit.site.cfg.py.in | 4 + lldb/test/API/lit.cfg.py | 5 + lldb/test/API/lit.site.cfg.py.in | 8 + lldb/test/Shell/helper/toolchain.py | 5 + lldb/test/Shell/lit.site.cfg.py.in | 9 + llvm/CMakeLists.txt | 4 + llvm/include/llvm/MC/MCFragment.h | 22 ++ llvm/include/llvm/MC/MCObjectStreamer.h | 2 + llvm/include/llvm/MC/MCStreamer.h | 6 + llvm/lib/MC/MCAssembler.cpp | 118 ++++++---- llvm/lib/MC/MCExpr.cpp | 10 +- llvm/lib/MC/MCFragment.cpp | 12 + llvm/lib/MC/MCObjectStreamer.cpp | 5 + llvm/lib/MC/MCStreamer.cpp | 2 + .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 24 ++ llvm/test/MC/X86/directive-avoid_end_align.s | 208 ++++++++++++++++++ 22 files changed, 483 insertions(+), 74 deletions(-) create mode 100644 llvm/test/MC/X86/directive-avoid_end_align.s diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h index 8dec32de9008e..3cc9d823c815b 100644 --- a/bolt/include/bolt/Rewrite/DWARFRewriter.h +++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h @@ -12,6 +12,7 @@ #include "bolt/Core/DIEBuilder.h" #include "bolt/Core/DebugData.h" #include "bolt/Core/DebugNames.h" +#include "bolt/Core/GDBIndex.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DIE.h" #include "llvm/DWP/DWP.h" @@ -131,7 +132,8 @@ class DWARFRewriter { makeFinalLocListsSection(DWARFVersion Version); /// Finalize type sections in the main binary. - CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer); + CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer, + GDBIndex &GDBIndexSection); /// Process and write out CUs that are passsed in. void finalizeCompileUnits(DIEBuilder &DIEBlder, DIEStreamer &Streamer, diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 5793963f9b80d..c231fffa0d5ff 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -487,6 +487,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, // This assumes the second instruction in the macro-op pair will get // assigned to its own MCRelaxableFragment. Since all JCC instructions // are relaxable, we should be safe. + Streamer.emitNeverAlignCodeAtEnd(/*Alignment to avoid=*/64, *BC.STI); } if (!EmitCodeOnly) { diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 8814ebbd10aa5..7b62999dfb2b6 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -185,6 +185,7 @@ namespace bolt { class DIEStreamer : public DwarfStreamer { DIEBuilder *DIEBldr; DWARFRewriter &Rewriter; + GDBIndex &GDBIndexSection; private: /// Emit the compilation unit header for \p Unit in the debug_info @@ -247,7 +248,7 @@ class DIEStreamer : public DwarfStreamer { const uint64_t TypeSignature = cast<DWARFTypeUnit>(Unit).getTypeHash(); DIE *TypeDIE = DIEBldr->getTypeDIE(Unit); const DIEBuilder::DWARFUnitInfo &UI = DIEBldr->getUnitInfoByDwarfUnit(Unit); - Rewriter.addGDBTypeUnitEntry( + GDBIndexSection.addGDBTypeUnitEntry( {UI.UnitOffset, TypeSignature, TypeDIE->getOffset()}); if (Unit.getVersion() < 5) { // Switch the section to .debug_types section. @@ -279,11 +280,12 @@ class DIEStreamer : public DwarfStreamer { public: DIEStreamer(DIEBuilder *DIEBldr, DWARFRewriter &Rewriter, + GDBIndex &GDBIndexSection, DWARFLinkerBase::OutputFileType OutFileType, raw_pwrite_stream &OutFile, DWARFLinkerBase::MessageHandlerTy Warning) : DwarfStreamer(OutFileType, OutFile, Warning), DIEBldr(DIEBldr), - Rewriter(Rewriter){}; + Rewriter(Rewriter), GDBIndexSection(GDBIndexSection) {}; using DwarfStreamer::emitCompileUnitHeader; @@ -326,12 +328,11 @@ static cl::opt<bool> KeepARanges( "keep or generate .debug_aranges section if .gdb_index is written"), cl::Hidden, cl::cat(BoltCategory)); -static cl::opt<bool> -DeterministicDebugInfo("deterministic-debuginfo", - cl::desc("disables parallel execution of tasks that may produce " - "nondeterministic debug info"), - cl::init(true), - cl::cat(BoltCategory)); +static cl::opt<bool> DeterministicDebugInfo( + "deterministic-debuginfo", + cl::desc("disables parallel execution of tasks that may produce " + "nondeterministic debug info"), + cl::init(true), cl::cat(BoltCategory)); static cl::opt<std::string> DwarfOutputPath( "dwarf-output-path", @@ -460,10 +461,11 @@ static std::optional<uint64_t> getAsAddress(const DWARFUnit &DU, static std::unique_ptr<DIEStreamer> createDIEStreamer(const Triple &TheTriple, raw_pwrite_stream &OutFile, StringRef Swift5ReflectionSegmentName, DIEBuilder &DIEBldr, - DWARFRewriter &Rewriter) { + DWARFRewriter &Rewriter, GDBIndex &GDBIndexSection) { std::unique_ptr<DIEStreamer> Streamer = std::make_unique<DIEStreamer>( - &DIEBldr, Rewriter, DWARFLinkerBase::OutputFileType::Object, OutFile, + &DIEBldr, Rewriter, GDBIndexSection, + DWARFLinkerBase::OutputFileType::Object, OutFile, [&](const Twine &Warning, StringRef Context, const DWARFDie *) {}); Error Err = Streamer->init(TheTriple, Swift5ReflectionSegmentName); if (Err) @@ -484,13 +486,12 @@ emitUnit(DIEBuilder &DIEBldr, DIEStreamer &Streamer, DWARFUnit &Unit) { return {U.UnitOffset, U.UnitLength, TypeHash}; } -static void emitDWOBuilder(const std::string &DWOName, - DIEBuilder &DWODIEBuilder, DWARFRewriter &Rewriter, - DWARFUnit &SplitCU, DWARFUnit &CU, - DWARFRewriter::DWPState &State, - DebugLocWriter &LocWriter, - DebugStrOffsetsWriter &StrOffstsWriter, - DebugStrWriter &StrWriter) { +static void +emitDWOBuilder(const std::string &DWOName, DIEBuilder &DWODIEBuilder, + DWARFRewriter &Rewriter, DWARFUnit &SplitCU, DWARFUnit &CU, + DWARFRewriter::DWPState &State, DebugLocWriter &LocWriter, + DebugStrOffsetsWriter &StrOffstsWriter, + DebugStrWriter &StrWriter, GDBIndex &GDBIndexSection) { // Populate debug_info and debug_abbrev for current dwo into StringRef. DWODIEBuilder.generateAbbrevs(); DWODIEBuilder.finish(); @@ -500,8 +501,9 @@ static void emitDWOBuilder(const std::string &DWOName, std::make_shared<raw_svector_ostream>(OutBuffer); const object::ObjectFile *File = SplitCU.getContext().getDWARFObj().getFile(); auto TheTriple = std::make_unique<Triple>(File->makeTriple()); - std::unique_ptr<DIEStreamer> Streamer = createDIEStreamer( - *TheTriple, *ObjOS, "DwoStreamerInitAug2", DWODIEBuilder, Rewriter); + std::unique_ptr<DIEStreamer> Streamer = + createDIEStreamer(*TheTriple, *ObjOS, "DwoStreamerInitAug2", + DWODIEBuilder, Rewriter, GDBIndexSection); DWARFRewriter::UnitMetaVectorType TUMetaVector; DWARFRewriter::UnitMeta CUMI = {0, 0, 0}; if (SplitCU.getContext().getMaxDWOVersion() >= 5) { @@ -652,6 +654,7 @@ void DWARFRewriter::updateDebugInfo() { DWARF5AcceleratorTable DebugNamesTable(opts::CreateDebugNames, BC, *StrWriter); + GDBIndex GDBIndexSection(BC); DWPState State; if (opts::WriteDWP) initDWPState(State); @@ -704,7 +707,8 @@ void DWARFRewriter::updateDebugInfo() { TempRangesSectionWriter->finalizeSection(); emitDWOBuilder(DWOName, DWODIEBuilder, *this, **SplitCU, *Unit, State, - DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter); + DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter, + GDBIndexSection); } if (Unit->getVersion() >= 5) { @@ -729,9 +733,10 @@ void DWARFRewriter::updateDebugInfo() { std::make_unique<raw_svector_ostream>(OutBuffer); const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile(); auto TheTriple = std::make_unique<Triple>(File->makeTriple()); - std::unique_ptr<DIEStreamer> Streamer = - createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this); - CUOffsetMap OffsetMap = finalizeTypeSections(DIEBlder, *Streamer); + std::unique_ptr<DIEStreamer> Streamer = createDIEStreamer( + *TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this, GDBIndexSection); + CUOffsetMap OffsetMap = + finalizeTypeSections(DIEBlder, *Streamer, GDBIndexSection); const bool SingleThreadedMode = opts::NoThreads || opts::DeterministicDebugInfo; @@ -761,7 +766,8 @@ void DWARFRewriter::updateDebugInfo() { finalizeDebugSections(DIEBlder, DebugNamesTable, *Streamer, *ObjOS, OffsetMap); - updateGdbIndexSection(OffsetMap, CUIndex); + GDBIndexSection.updateGdbIndexSection(OffsetMap, CUIndex, + *ARangesSectionWriter); } void DWARFRewriter::updateUnitDebugInfo( @@ -1429,7 +1435,8 @@ void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) { } CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder, - DIEStreamer &Streamer) { + DIEStreamer &Streamer, + GDBIndex &GDBIndexSection) { // update TypeUnit DW_AT_stmt_list with new .debug_line information. auto updateLineTable = [&](const DWARFUnit &Unit) -> void { DIE *UnitDIE = DIEBlder.getUnitDIEbyUnit(Unit); @@ -1449,8 +1456,8 @@ CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder, std::make_shared<raw_svector_ostream>(OutBuffer); const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile(); auto TheTriple = std::make_unique<Triple>(File->makeTriple()); - std::unique_ptr<DIEStreamer> TypeStreamer = - createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this); + std::unique_ptr<DIEStreamer> TypeStreamer = createDIEStreamer( + *TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this, GDBIndexSection); // generate debug_info and CUMap CUOffsetMap CUMap; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d44faa55c456f..63bb86717bb14 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5483,6 +5483,10 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">, MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>; def pipe : Flag<["-", "--"], "pipe">, HelpText<"Use pipes between commands, when possible">; +// Facebook T92898286 +def post_link_optimize : Flag<["--"], "post-link-optimize">, + HelpText<"Apply post-link optimizations using BOLT">; +// End Facebook T92898286 def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">; def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index b141e5f2adfab..f7611af5763ab 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -672,12 +672,41 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } + // Facebook T92898286 + if (Args.hasArg(options::OPT_post_link_optimize)) + CmdArgs.push_back("-q"); + // End Facebook T92898286 + Args.AddAllArgs(CmdArgs, options::OPT_T); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, Output)); + // Facebook T92898286 + if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename()) + return; + + const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv")); + ArgStringList MoveCmdArgs; + MoveCmdArgs.push_back(Output.getFilename()); + const char *PreBoltBin = + Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt"); + MoveCmdArgs.push_back(PreBoltBin); + C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), + MvExec, MoveCmdArgs, std::nullopt)); + + ArgStringList BoltCmdArgs; + const char *BoltExec = + Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt")); + BoltCmdArgs.push_back(PreBoltBin); + BoltCmdArgs.push_back("-reorder-blocks=reverse"); + BoltCmdArgs.push_back("-update-debug-sections"); + BoltCmdArgs.push_back("-o"); + BoltCmdArgs.push_back(Output.getFilename()); + C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), + BoltExec, BoltCmdArgs, std::nullopt)); + // End Facebook T92898286 } void tools::gnutools::Assembler::ConstructJob(Compilation &C, diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index 774c4eaf4d976..619634578dfe6 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -84,7 +84,13 @@ def get_required_attr(config, attr_name): # use_clang() and use_lld() respectively, so set them to "", if needed. if not hasattr(config, "clang_src_dir"): config.clang_src_dir = "" -llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) +# Facebook T92898286 +should_test_bolt = get_required_attr(config, "llvm_test_bolt") +if should_test_bolt: + llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"]) +else: + llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) +# End Facebook T92898286 if not hasattr(config, "lld_src_dir"): config.lld_src_dir = "" @@ -293,3 +299,9 @@ def get_clang_default_dwarf_version_string(triple): # Allow 'REQUIRES: XXX-registered-target' in tests. for arch in config.targets_to_build: config.available_features.add(arch.lower() + "-registered-target") + +# Facebook T92898286 +# Ensure the user's PYTHONPATH is included. +if "PYTHONPATH" in os.environ: + config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"] +# End Facebook T92898286 diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in index 39458dfc79afd..2d53cd377f033 100644 --- a/cross-project-tests/lit.site.cfg.py.in +++ b/cross-project-tests/lit.site.cfg.py.in @@ -21,6 +21,10 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index d934349fe3ca3..d4a62c51458cc 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -248,6 +248,11 @@ def delete_module_cache(path): if is_configured("lldb_framework_dir"): dotest_cmd += ["--framework", config.lldb_framework_dir] +# Facebook T92898286 +if is_configured("llvm_test_bolt"): + dotest_cmd += ["-E", '"--post-link-optimize"'] +# End Facebook T92898286 + if ( "lldb-repro-capture" in config.available_features or "lldb-repro-replay" in config.available_features diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index 8b2d09ae41cd2..602f45759e48f 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -1,5 +1,9 @@ @LIT_SITE_CFG_IN_HEADER@ +#Facebook T92898286 +import lit.util +#End Facebook T92898286 + config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -39,6 +43,10 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api") +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + # Plugins lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@' if lldb_build_intel_pt == '1': diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py index 255955fc70d8c..7b7be06643166 100644 --- a/lldb/test/Shell/helper/toolchain.py +++ b/lldb/test/Shell/helper/toolchain.py @@ -165,6 +165,11 @@ def use_support_substitutions(config): if config.cmake_sysroot: host_flags += ["--sysroot={}".format(config.cmake_sysroot)] + # Facebook T92898286 + if config.llvm_test_bolt: + host_flags += ["--post-link-optimize"] + # End Facebook T92898286 + host_flags = " ".join(host_flags) config.substitutions.append(("%clang_host", "%clang " + host_flags)) config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags)) diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index b69e7bce1bc0b..fe8323734b7db 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -1,5 +1,10 @@ @LIT_SITE_CFG_IN_HEADER@ +#Facebook T92898286 +import lit.util +#End Facebook T92898286 + + config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -31,6 +36,10 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell") +# Facebook T92898286 +config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") +# End Facebook T92898286 + import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 3208147101c0d..ecd36d2564e4f 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -709,6 +709,10 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH option(LLVM_USE_SPLIT_DWARF "Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF) +# Facebook T92898286 +option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF) +# End Facebook T92898286 + # Define an option controlling whether we should build for 32-bit on 64-bit # platforms, where supported. if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX")) diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index a9b19dc56f16a..256d98423e030 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -33,6 +33,7 @@ class MCFragment : public ilist_node_with_parent<MCFragment, MCSection> { public: enum FragmentType : uint8_t { FT_Align, + FT_NeverAlign, FT_Data, FT_CompactEncodedInst, FT_Fill, @@ -344,6 +345,27 @@ class MCAlignFragment : public MCFragment { } }; +class MCNeverAlignFragment : public MCFragment { + /// The alignment the end of the next fragment should avoid. + unsigned Alignment; + + /// When emitting Nops some subtargets have specific nop encodings. + const MCSubtargetInfo &STI; + +public: + MCNeverAlignFragment(unsigned Alignment, const MCSubtargetInfo &STI, + MCSection *Sec = nullptr) + : MCFragment(FT_NeverAlign, false, Sec), Alignment(Alignment), STI(STI) {} + + unsigned getAlignment() const { return Alignment; } + + const MCSubtargetInfo &getSubtargetInfo() const { return STI; } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_NeverAlign; + } +}; + class MCFillFragment : public MCFragment { uint8_t ValueSize; /// Value to use for filling bytes. diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index e212d54613980..c7d760721e369 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -157,6 +157,8 @@ class MCObjectStreamer : public MCStreamer { unsigned MaxBytesToEmit = 0) override; void emitCodeAlignment(Align ByteAlignment, const MCSubtargetInfo *STI, unsigned MaxBytesToEmit = 0) override; + void emitNeverAlignCodeAtEnd(unsigned ByteAlignment, + const MCSubtargetInfo &STI) override; void emitValueToOffset(const MCExpr *Offset, unsigned char Value, SMLoc Loc) override; void emitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index b7468cf70a664..dd813192d9ca0 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -887,6 +887,12 @@ class MCStreamer { virtual void emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI, unsigned MaxBytesToEmit = 0); + /// If the end of the fragment following this NeverAlign fragment ever gets + /// aligned to \p ByteAlignment, this fragment emits a single nop before the + /// following fragment to break this end-alignment. + virtual void emitNeverAlignCodeAtEnd(unsigned ByteAlignment, + const MCSubtargetInfo &STI); + /// Emit some number of copies of \p Value until the byte offset \p /// Offset is reached. /// diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index ad30b5ce9e631..62baeb93ea7d0 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -298,6 +298,43 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup, return IsResolved; } +/// Check if the branch crosses the boundary. +/// +/// \param StartAddr start address of the fused/unfused branch. +/// \param Size size of the fused/unfused branch. +/// \param BoundaryAlignment alignment requirement of the branch. +/// \returns true if the branch cross the boundary. +static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size, + Align BoundaryAlignment) { + uint64_t EndAddr = StartAddr + Size; + return (StartAddr >> Log2(BoundaryAlignment)) != + ((EndAddr - 1) >> Log2(BoundaryAlignment)); +} + +/// Check if the branch is against the boundary. +/// +/// \param StartAddr start address of the fused/unfused branch. +/// \param Size size of the fused/unfused branch. +/// \param BoundaryAlignment alignment requirement of the branch. +/// \returns true if the branch is against the boundary. +static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size, + Align BoundaryAlignment) { + uint64_t EndAddr = StartAddr + Size; + return (EndAddr & (BoundaryAlignment.value() - 1)) == 0; +} + +/// Check if the branch needs padding. +/// +/// \param StartAddr start address of the fused/unfused branch. +/// \param Size size of the fused/unfused branch. +/// \param BoundaryAlignment alignment requirement of the branch. +/// \returns true if the branch needs padding. +static bool needPadding(uint64_t StartAddr, uint64_t Size, + Align BoundaryAlignment) { + return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) || + isAgainstBoundary(StartAddr, Size, BoundaryAlignment); +} + uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const { assert(getBackendPtr() && "Requires assembler backend"); @@ -358,6 +395,41 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, return Size; } + case MCFragment::FT_NeverAlign: { + // Disclaimer: NeverAlign fragment size depends on the size of its immediate + // successor, but NeverAlign need not be a MCRelaxableFragment. + // NeverAlign fragment size is recomputed if the successor is relaxed: + // - If RelaxableFragment is relaxed, it gets invalidated by marking its + // predecessor as LastValidFragment. + // - This forces the assembler to call MCAsmLayout::layoutFragment on that + // relaxable fragment, which in turn will always ask the predecessor to + // compute its size (see "computeFragmentSize(prev)" in layoutFragment). + // + // In short, the simplest way to ensure that computeFragmentSize() is sane + // is to establish the following rule: it should never examine fragments + // after the current fragment in the section. If we logically need to + // examine any fragment after the current fragment, we need to do that using + // relaxation, inside MCAssembler::layoutSectionOnce. + const MCNeverAlignFragment &NAF = cast<MCNeverAlignFragment>(F); + const MCFragment *NF = F.getNextNode(); + uint64_t Offset = Layout.getFragmentOffset(&NAF); + size_t NextFragSize = 0; + if (const auto *NextFrag = dyn_cast<MCRelaxableFragment>(NF)) { + NextFragSize = NextFrag->getContents().size(); + } else if (const auto *NextFrag = dyn_cast<MCDataFragment>(NF)) { + NextFragSize = NextFrag->getContents().size(); + } else { + llvm_unreachable("Didn't find the expected fragment after NeverAlign"); + } + // Check if the next fragment ends at the alignment we want to avoid. + if (isAgainstBoundary(Offset, NextFragSize, Align(NAF.getAlignment()))) { + // Avoid this alignment by introducing minimum nop. + assert(getBackend().getMinimumNopSize() != NAF.getAlignment()); + return getBackend().getMinimumNopSize(); + } + return 0; + } + case MCFragment::FT_Org: { const MCOrgFragment &OF = cast<MCOrgFragment>(F); MCValue Value; @@ -581,6 +653,15 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm, break; } + case MCFragment::FT_NeverAlign: { + const MCNeverAlignFragment &NAF = cast<MCNeverAlignFragment>(F); + if (!Asm.getBackend().writeNopData(OS, FragmentSize, + &NAF.getSubtargetInfo())) + report_fatal_error("unable to write nop sequence of " + + Twine(FragmentSize) + " bytes"); + break; + } + case MCFragment::FT_Data: ++stats::EmittedDataFragments; OS << cast<MCDataFragment>(F).getContents(); @@ -1052,43 +1133,6 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { return OldSize != LF.getContents().size(); } -/// Check if the branch crosses the boundary. -/// -/// \param StartAddr start address of the fused/unfused branch. -/// \param Size size of the fused/unfused branch. -/// \param BoundaryAlignment alignment requirement of the branch. -/// \returns true if the branch cross the boundary. -static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size, - Align BoundaryAlignment) { - uint64_t EndAddr = StartAddr + Size; - return (StartAddr >> Log2(BoundaryAlignment)) != - ((EndAddr - 1) >> Log2(BoundaryAlignment)); -} - -/// Check if the branch is against the boundary. -/// -/// \param StartAddr start address of the fused/unfused branch. -/// \param Size size of the fused/unfused branch. -/// \param BoundaryAlignment alignment requirement of the branch. -/// \returns true if the branch is against the boundary. -static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size, - Align BoundaryAlignment) { - uint64_t EndAddr = StartAddr + Size; - return (EndAddr & (BoundaryAlignment.value() - 1)) == 0; -} - -/// Check if the branch needs padding. -/// -/// \param StartAddr start address of the fused/unfused branch. -/// \param Size size of the fused/unfused branch. -/// \param BoundaryAlignment alignment requirement of the branch. -/// \returns true if the branch needs padding. -static bool needPadding(uint64_t StartAddr, uint64_t Size, - Align BoundaryAlignment) { - return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) || - isAgainstBoundary(StartAddr, Size, BoundaryAlignment); -} - bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout, MCBoundaryAlignFragment &BF) { // BoundaryAlignFragment that doesn't need to align any fragment should not be diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index b065d03651c45..9add574b0e5b8 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -675,14 +675,8 @@ static void AttemptToFoldSymbolOffsetDifference( if (FA == FB) { Reverse = SA.getOffset() < SB.getOffset(); } else if (!isa<MCDummyFragment>(FA)) { - // Testing FA < FB is slow. Use setLayoutOrder to speed up computation. - // The formal layout order will be finalized in MCAssembler::layout. - if (FA->getLayoutOrder() == 0 || FB->getLayoutOrder()== 0) { - unsigned LayoutOrder = 0; - for (MCFragment &F : *FA->getParent()) - F.setLayoutOrder(++LayoutOrder); - } - Reverse = FA->getLayoutOrder() < FB->getLayoutOrder(); + Reverse = std::find_if(std::next(FA->getIterator()), SecA.end(), + [&](auto &I) { return &I == FB; }) != SecA.end(); } uint64_t SAOffset = SA.getOffset(), SBOffset = SB.getOffset(); diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index 7d0826802d0af..6e09caaab0957 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -274,6 +274,9 @@ void MCFragment::destroy() { case FT_Align: delete cast<MCAlignFragment>(this); return; + case FT_NeverAlign: + delete cast<MCNeverAlignFragment>(this); + return; case FT_Data: delete cast<MCDataFragment>(this); return; @@ -342,6 +345,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { OS << "<"; switch (getKind()) { case MCFragment::FT_Align: OS << "MCAlignFragment"; break; + case MCFragment::FT_NeverAlign: + OS << "MCNeverAlignFragment"; + break; case MCFragment::FT_Data: OS << "MCDataFragment"; break; case MCFragment::FT_CompactEncodedInst: OS << "MCCompactEncodedInstFragment"; break; @@ -381,6 +387,12 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">"; break; } + case MCFragment::FT_NeverAlign: { + const MCNeverAlignFragment *NAF = cast<MCNeverAlignFragment>(this); + OS << "\n "; + OS << " Alignment:" << NAF->getAlignment() << ">"; + break; + } case MCFragment::FT_Data: { const auto *DF = cast<MCDataFragment>(this); OS << "\n "; diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 0ccade91677a4..117475b7dd90b 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -658,6 +658,11 @@ void MCObjectStreamer::emitCodeAlignment(Align Alignment, cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true, STI); } +void MCObjectStreamer::emitNeverAlignCodeAtEnd(unsigned ByteAlignment, + const MCSubtargetInfo &STI) { + insert(new MCNeverAlignFragment(ByteAlignment, STI)); +} + void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value, SMLoc Loc) { diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 199d865ea3496..a97cba6c89972 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1235,6 +1235,8 @@ void MCStreamer::emitValueToAlignment(Align Alignment, int64_t Value, unsigned MaxBytesToEmit) {} void MCStreamer::emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI, unsigned MaxBytesToEmit) {} +void MCStreamer::emitNeverAlignCodeAtEnd(unsigned ByteAlignment, + const MCSubtargetInfo &STI) {} void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value, SMLoc Loc) {} void MCStreamer::emitBundleAlignMode(Align Alignment) {} diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6623106109316..6c6bd2cf31e86 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1153,6 +1153,7 @@ class X86AsmParser : public MCTargetAsmParser { bool parseDirectiveArch(); bool parseDirectiveNops(SMLoc L); bool parseDirectiveEven(SMLoc L); + bool parseDirectiveAvoidEndAlign(SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); /// CodeView FPO data directives. @@ -4601,6 +4602,8 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return false; } else if (IDVal == ".nops") return parseDirectiveNops(DirectiveID.getLoc()); + else if (IDVal == ".avoid_end_align") + return parseDirectiveAvoidEndAlign(DirectiveID.getLoc()); else if (IDVal == ".even") return parseDirectiveEven(DirectiveID.getLoc()); else if (IDVal == ".cv_fpo_proc") @@ -4695,6 +4698,27 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) { return false; } +/// Directive for NeverAlign fragment testing, not for general usage! +/// parseDirectiveAvoidEndAlign +/// ::= .avoid_end_align alignment +bool X86AsmParser::parseDirectiveAvoidEndAlign(SMLoc L) { + int64_t Alignment = 0; + SMLoc AlignmentLoc; + AlignmentLoc = getTok().getLoc(); + if (getParser().checkForValidSection() || + getParser().parseAbsoluteExpression(Alignment)) + return true; + + if (getParser().parseEOL("unexpected token in directive")) + return true; + + if (Alignment <= 0) + return Error(AlignmentLoc, "expected a positive alignment"); + + getParser().getStreamer().emitNeverAlignCodeAtEnd(Alignment, getSTI()); + return false; +} + /// ParseDirectiveCode /// ::= .code16 | .code32 | .code64 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { diff --git a/llvm/test/MC/X86/directive-avoid_end_align.s b/llvm/test/MC/X86/directive-avoid_end_align.s new file mode 100644 index 0000000000000..1d748401edc12 --- /dev/null +++ b/llvm/test/MC/X86/directive-avoid_end_align.s @@ -0,0 +1,208 @@ +# RUN: llvm-mc -triple=x86_64 -filetype=obj %s | llvm-objdump --no-show-raw-insn -d - | FileCheck %s +# RUN: not llvm-mc -triple=x86_64 --defsym ERR=1 %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR + +# avoid_end_align has no effect since test doesn't end at alignment boundary: +.avoid_end_align 64 +# CHECK-NOT: nop + testl %eax, %eax +# CHECK: testl %eax, %eax + je .LBB0 + +.fill 58, 1, 0x00 +# NeverAlign followed by MCDataFragment: +# avoid_end_align inserts nop because `test` would end at alignment boundary: +.avoid_end_align 64 +# CHECK: 3e: nop + testl %eax, %eax +# CHECK-NEXT: 3f: testl %eax, %eax + je .LBB0 +# CHECK-NEXT: 41: je +.LBB0: + retq + +.p2align 6 +.L0: +.nops 57 + int3 +# NeverAlign followed by RelaxableFragment: +.avoid_end_align 64 +# CHECK: ba: nop + cmpl $(.L1-.L0), %eax +# CHECK-NEXT: bb: cmpl + je .L0 +# CHECK-NEXT: c1: je +.nops 65 +.L1: + +############################################################################### +# Experiment A: +# Check that NeverAlign doesn't introduce infinite loops in layout. +# Control: +# 1. NeverAlign fragment is not added, +# 2. Short formats of cmp and jcc are used (3 and 2 bytes respectively), +# 3. cmp and jcc are placed such that to be split by 64B alignment boundary. +# 4. jcc would be relaxed to a longer format if at least one byte is added +# between .L10 and je itself, e.g. by adding a NeverAlign padding byte, +# or relaxing cmp instruction. +# 5. cmp would be relaxed to a longer format if at least one byte is added +# between .L11 and .L12, e.g. due to relaxing jcc instruction. +.p2align 6 +# CHECK: 140: int3 +.fill 2, 1, 0xcc +.L10: +.nops 122 + int3 +# CHECK: 1bc: int3 +# no avoid_end_align here +# CHECK-NOT: nop + cmp $(.L12-.L11), %eax +# CHECK: 1bd: cmpl +.L11: + je .L10 +# CHECK-NEXT: 1c0: je +.nops 125 +.L12: + +# Experiment: +# Same setup as control, except NeverAlign fragment is added before cmp. +# Expected effect: +# 1. NeverAlign pads cmp+jcc by one byte since cmp and jcc are split by a 64B +# alignment boundary, +# 2. This extra byte forces jcc relaxation to a longer format (Control rule #4), +# 3. This results in an cmp relaxation (Control rule #5), +# 4. Which in turn makes NeverAlign fragment unnecessary as cmp and jcc +# are no longer split by an alignment boundary (cmp crosses the boundary). +# 5. NeverAlign padding is removed. +# 6. cmp and jcc instruction remain in relaxed form. +# 7. Relaxation converges, layout succeeds. +.p2align 6 +# CHECK: 240: int3 +.fill 2, 1, 0xcc +.L20: +.nops 122 + int3 +# CHECK: 2bc: int3 +.avoid_end_align 64 +# CHECK-NOT: nop + cmp $(.L22-.L21), %eax +# CHECK-NEXT: 2bd: cmpl +.L21: + je .L20 +# CHECK-NEXT: 2c3: je +.nops 125 +.L22: + +############################################################################### +# Experiment B: similar to exp A, but we check that once NeverAlign padding is +# removed from the layout (exp A, experiment step 5), the increased distance +# between the symbols L33 and L34 triggers the relaxation of instruction at +# label L32. +# +# Control 1: using a one-byte instruction at L33 (site of NeverAlign) leads to +# steps 2-3 of exp A, experiment: +# 2. This extra byte forces jcc relaxation to a longer format (Control rule #4), +# 3. This results in an cmp relaxation (Control rule #5), +# => short cmp under L32 +.p2align 6 +# CHECK: 380: int3 +.fill 2, 1, 0xcc +.L30: +.nops 122 + int3 +# CHECK: 3fc: int3 + hlt +#.avoid_end_align 64 +.L33: + cmp $(.L32-.L31), %eax +# CHECK: 3fe: cmpl +.L31: + je .L30 +# CHECK-NEXT: 404: je +.nops 114 +.p2align 1 + int3 + int3 +# CHECK: 47c: int3 +.L34: +.nops 9 +.L32: + cmp $(.L33-.L34), %eax +# CHECK: 487: cmp +# note that the size of cmp is 48a-487 == 3 bytes (distance is exactly -128) + int3 +# CHECK-NEXT: 48a: int3 + +# Control 2: leaving out a byte at L43 (site of NeverAlign), plus +# relaxed jcc and cmp leads to a relaxed cmp under L42 (-129 as cmp's immediate) +.p2align 6 +# CHECK: 4c0: int3 +.fill 2, 1, 0xcc +.L40: +.nops 122 + int3 +# CHECK: 53c: int3 +# int3 +#.avoid_end_align 64 +.L43: + cmp $(.L42-.L41+0x100), %eax +# CHECK: 53d: cmpl +.L41: + je .L40+0x100 +# CHECK-NEXT: 543: je +.nops 114 +.p2align 1 + int3 + int3 +# CHECK: 5bc: int3 +.L44: +.nops 9 +.L42: + cmp $(.L43-.L44), %eax +# CHECK: 5c7: cmp +# note that the size of cmp is 5cd-5c7 == 6 bytes (distance is exactly -129) + int3 +# CHECK-NEXT: 5cd: int3 + +# Experiment +# Checking if removing NeverAlign padding at L53 as a result of alignment and +# relaxation of cmp and jcc following it (see exp A), thus reproducing the case +# in Control 2 (getting a relaxed cmp under L52), is handled correctly. +.p2align 6 +# CHECK: 600: int3 +.fill 2, 1, 0xcc +.L50: +.nops 122 + int3 +# CHECK: 67c: int3 +.avoid_end_align 64 +.L53: +# CHECK-NOT: nop + cmp $(.L52-.L51), %eax +# CHECK-NEXT: 67d: cmpl +.L51: + je .L50 +# CHECK-NEXT: 683: je +.nops 114 +.p2align 1 + int3 + int3 +# CHECK: 6fc: int3 +.L54: +.nops 9 +.L52: + cmp $(.L53-.L54), %eax +# CHECK: 707: cmp +# note that the size of cmp is 70d-707 == 6 bytes (distance is exactly -129) + int3 +# CHECK-NEXT: 70d: int3 + +.ifdef ERR +# ERR: {{.*}}.s:[[#@LINE+1]]:17: error: unknown token in expression +.avoid_end_align +# ERR: {{.*}}.s:[[#@LINE+1]]:18: error: expected absolute expression +.avoid_end_align x +# ERR: {{.*}}.s:[[#@LINE+1]]:18: error: expected a positive alignment +.avoid_end_align 0 +# ERR: {{.*}}.s:[[#@LINE+1]]:20: error: unexpected token in directive +.avoid_end_align 64, 0 +.endif >From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Mon, 17 Jun 2024 15:39:02 -0700 Subject: [PATCH 02/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f0fcb1c130002..2bca83c9d11ec 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); for (auto& [_, BF] : BC.getBinaryFunctions()) { + if (!ProfiledFunctions.count(&BF)) + continue; StrictBinaryFunctionHashes[BF.getHash()] = &BF; } >From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Mon, 17 Jun 2024 15:39:39 -0700 Subject: [PATCH 03/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 2bca83c9d11ec..56474a67307ed 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses the strict hash of profiled and binary functions to match functions // that are not matched by name or common name. - std::unordered_map<size_t, BinaryFunction*> StrictBinaryFunctionHashes; + std::unordered_map<size_t, BinaryFunction *> StrictBinaryFunctionHashes; StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); - for (auto& [_, BF] : BC.getBinaryFunctions()) { + for (auto &[_, BF] : BC.getBinaryFunctions()) { if (!ProfiledFunctions.count(&BF)) continue; StrictBinaryFunctionHashes[BF.getHash()] = &BF; @@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { for (auto YamlBF : YamlBP.Functions) { auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); - if (It != StrictBinaryFunctionHashes.end() && !ProfiledFunctions.count(It->second)) { + if (It != StrictBinaryFunctionHashes.end() && + !ProfiledFunctions.count(It->second)) { auto *BF = It->second; matchProfileToFunction(YamlBF, *BF); } >From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Mon, 17 Jun 2024 15:55:58 -0700 Subject: [PATCH 04/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 +- bolt/test/X86/profile-passthrough-block.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 56474a67307ed..779d60bce3b66 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); for (auto &[_, BF] : BC.getBinaryFunctions()) { - if (!ProfiledFunctions.count(&BF)) + if (ProfiledFunctions.count(&BF)) continue; StrictBinaryFunctionHashes[BF.getHash()] = &BF; } diff --git a/bolt/test/X86/profile-passthrough-block.test b/bolt/test/X86/profile-passthrough-block.test index 1b875885260dc..ed2a8117ddfc4 100644 --- a/bolt/test/X86/profile-passthrough-block.test +++ b/bolt/test/X86/profile-passthrough-block.test @@ -57,7 +57,7 @@ header: functions: - name: main fid: 0 - hash: 0x0000000000000000 + hash: 0x0000000000000001 exec: 1 nblocks: 6 blocks: >From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Mon, 17 Jun 2024 15:58:22 -0700 Subject: [PATCH 05/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 779d60bce3b66..e3d30bfdb74e4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { } for (auto YamlBF : YamlBP.Functions) { + if (YamlBF.Used) + continue; auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); if (It != StrictBinaryFunctionHashes.end() && !ProfiledFunctions.count(It->second)) { >From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Mon, 17 Jun 2024 16:00:27 -0700 Subject: [PATCH 06/21] spr amend Created using spr 1.3.4 --- bolt/test/X86/profile-passthrough-block.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/test/X86/profile-passthrough-block.test b/bolt/test/X86/profile-passthrough-block.test index ed2a8117ddfc4..1b875885260dc 100644 --- a/bolt/test/X86/profile-passthrough-block.test +++ b/bolt/test/X86/profile-passthrough-block.test @@ -57,7 +57,7 @@ header: functions: - name: main fid: 0 - hash: 0x0000000000000001 + hash: 0x0000000000000000 exec: 1 nblocks: 6 blocks: >From 99cf8918e945356d12b5d29fe8174c610f305559 Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Tue, 18 Jun 2024 14:19:45 -0700 Subject: [PATCH 07/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 51 +++++++------- .../X86/hashing-based-function-matching.test | 67 +++++++++++++++++++ 2 files changed, 93 insertions(+), 25 deletions(-) create mode 100644 bolt/test/X86/hashing-based-function-matching.test diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index e3d30bfdb74e4..78d46eea5c728 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -363,9 +363,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { return Profile.Hash == static_cast<uint64_t>(BF.getHash()); }; - // We have to do 2 passes since LTO introduces an ambiguity in function - // names. The first pass assigns profiles that match 100% by name and - // by hash. The second pass allows name ambiguity for LTO private functions. + // This first pass assigns profiles that match 100% by name and by hash. for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { if (!BF) continue; @@ -383,6 +381,30 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { matchProfileToFunction(YamlBF, Function); } + // Uses the strict hash of profiled and binary functions to match functions + // that are not matched by name or common name. + std::unordered_map<size_t, BinaryFunction *> StrictBinaryFunctionHashes; + StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); + + for (auto &[_, BF] : BC.getBinaryFunctions()) { + if (ProfiledFunctions.count(&BF)) + continue; + BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); + StrictBinaryFunctionHashes[BF.getHash()] = &BF; + } + + for (auto YamlBF : YamlBP.Functions) { + if (YamlBF.Used) + continue; + auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); + if (It != StrictBinaryFunctionHashes.end() && + !ProfiledFunctions.count(It->second)) { + auto *BF = It->second; + matchProfileToFunction(YamlBF, *BF); + } + } + + // This second pass allows name ambiguity for LTO private functions. for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) { if (!LTOCommonNameFunctionMap.contains(CommonName)) continue; @@ -415,33 +437,12 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) matchProfileToFunction(YamlBF, *BF); - // Uses the strict hash of profiled and binary functions to match functions - // that are not matched by name or common name. - std::unordered_map<size_t, BinaryFunction *> StrictBinaryFunctionHashes; - StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); - - for (auto &[_, BF] : BC.getBinaryFunctions()) { - if (ProfiledFunctions.count(&BF)) - continue; - StrictBinaryFunctionHashes[BF.getHash()] = &BF; - } - - for (auto YamlBF : YamlBP.Functions) { - if (YamlBF.Used) - continue; - auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); - if (It != StrictBinaryFunctionHashes.end() && - !ProfiledFunctions.count(It->second)) { - auto *BF = It->second; - matchProfileToFunction(YamlBF, *BF); - } - } - for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name << '\n'; + // Set for parseFunctionProfile(). NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions"); NormalizeByCalls = usesEvent("branches"); diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test new file mode 100644 index 0000000000000..3c2cd834f90bf --- /dev/null +++ b/bolt/test/X86/hashing-based-function-matching.test @@ -0,0 +1,67 @@ +## Test YAMLProfileReader support for pass-through blocks in non-matching edges: +## match the profile edge A -> C to the CFG with blocks A -> B -> C. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=1 \ +# RUN: --print-cfg 2>&1 | FileCheck %s + +# CHECK: Binary Function "main" after building cfg +# CHECK: Profile Acc : 100.0% +# CHECK-NOT: BOLT-WARNING: no successor for block .LFT0 that matches index 3 or block .Ltmp0 + +#--- main.s +.globl main +.type main, @function +main: + .cfi_startproc +.LBB00: + pushq %rbp + movq %rsp, %rbp + subq $16, %rsp + testq %rax, %rax + js .LBB03 +.LBB01: + jne .LBB04 +.LBB02: + nop +.LBB03: + xorl %eax, %eax + addq $16, %rsp + popq %rbp + retq +.LBB04: + xorl %eax, %eax + addq $16, %rsp + popq %rbp + retq +## For relocations against .text +.LBB05: + call exit + .cfi_endproc + .size main, .-main + +#--- yaml +--- +header: + profile-version: 1 + binary-name: 'profile-passthrough-block.s.tmp.exe' + binary-build-id: '<unknown>' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false + hash-func: xxh3 +functions: + - name: main2 + fid: 0 + hash: 0x72f82deaa6fe65fb + exec: 1 + nblocks: 6 + blocks: + - bid: 1 + insns: 1 + succ: [ { bid: 3, cnt: 1} ] +... >From e11820ff0121d9481a24b825f1910935e6d5789d Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Thu, 20 Jun 2024 08:28:02 -0700 Subject: [PATCH 08/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 42 +++++++++---------- .../X86/hashing-based-function-matching.test | 4 +- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 78d46eea5c728..7209168c0f81d 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -363,6 +363,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { return Profile.Hash == static_cast<uint64_t>(BF.getHash()); }; + // Computes hash for binary functions. + if (!opts::IgnoreHash) + for (auto &[_, BF] : BC.getBinaryFunctions()) + BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); + // This first pass assigns profiles that match 100% by name and by hash. for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { if (!BF) @@ -372,35 +377,29 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // the profile. Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE); - // Recompute hash once per function. - if (!opts::IgnoreHash) - Function.computeHash(YamlBP.Header.IsDFSOrder, - YamlBP.Header.HashFunction); - if (profileMatches(YamlBF, Function)) matchProfileToFunction(YamlBF, Function); } // Uses the strict hash of profiled and binary functions to match functions // that are not matched by name or common name. - std::unordered_map<size_t, BinaryFunction *> StrictBinaryFunctionHashes; - StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size()); + if (!opts::IgnoreHash) { + std::unordered_map<size_t, BinaryFunction *> StrictHashToBF; + StrictHashToBF.reserve(BC.getBinaryFunctions().size()); - for (auto &[_, BF] : BC.getBinaryFunctions()) { - if (ProfiledFunctions.count(&BF)) - continue; - BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); - StrictBinaryFunctionHashes[BF.getHash()] = &BF; - } + for (auto &[_, BF] : BC.getBinaryFunctions()) { + StrictHashToBF[BF.getHash()] = &BF; + } - for (auto YamlBF : YamlBP.Functions) { - if (YamlBF.Used) - continue; - auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash); - if (It != StrictBinaryFunctionHashes.end() && - !ProfiledFunctions.count(It->second)) { - auto *BF = It->second; - matchProfileToFunction(YamlBF, *BF); + for (auto YamlBF : YamlBP.Functions) { + if (YamlBF.Used) + continue; + auto It = StrictHashToBF.find(YamlBF.Hash); + if (It != StrictHashToBF.end() && + !ProfiledFunctions.count(It->second)) { + auto *BF = It->second; + matchProfileToFunction(YamlBF, *BF); + } } } @@ -442,7 +441,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name << '\n'; - // Set for parseFunctionProfile(). NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions"); NormalizeByCalls = usesEvent("branches"); diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test index 3c2cd834f90bf..833450047284a 100644 --- a/bolt/test/X86/hashing-based-function-matching.test +++ b/bolt/test/X86/hashing-based-function-matching.test @@ -5,7 +5,7 @@ # RUN: split-file %s %t # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib -# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=1 \ +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ # RUN: --print-cfg 2>&1 | FileCheck %s # CHECK: Binary Function "main" after building cfg @@ -57,7 +57,7 @@ header: functions: - name: main2 fid: 0 - hash: 0x72f82deaa6fe65fb + hash: 0x72F82DEAA6FE65FB exec: 1 nblocks: 6 blocks: >From 83858e2fcc565673bf10b030199d30153ba015b3 Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Thu, 20 Jun 2024 10:45:17 -0700 Subject: [PATCH 09/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 7209168c0f81d..124af2c25b9e4 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -329,6 +329,9 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { } bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) { + if (!opts::IgnoreHash) { + return true; + } for (StringRef Name : BF.getNames()) if (ProfileFunctionNames.contains(Name)) return true; >From 73871266cf7709ae335be22428290cdc1efe410b Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Thu, 20 Jun 2024 14:35:45 -0700 Subject: [PATCH 10/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 41 ++++++++++++++++--- bolt/lib/Rewrite/RewriteInstance.cpp | 6 ++- bolt/lib/Utils/CommandLineOpts.cpp | 5 +++ .../X86/hashing-based-function-matching.test | 6 +-- 4 files changed, 47 insertions(+), 11 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 124af2c25b9e4..603620aa483d7 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -22,6 +22,7 @@ namespace opts { extern cl::opt<unsigned> Verbosity; extern cl::OptionCategory BoltOptCategory; extern cl::opt<bool> InferStaleProfile; +extern cl::opt<bool> MatchingFunctionsWithHash; static llvm::cl::opt<bool> IgnoreHash("profile-ignore-hash", @@ -329,9 +330,6 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { } bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) { - if (!opts::IgnoreHash) { - return true; - } for (StringRef Name : BF.getNames()) if (ProfileFunctionNames.contains(Name)) return true; @@ -366,10 +364,26 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { return Profile.Hash == static_cast<uint64_t>(BF.getHash()); }; + uint64_t MatchedWithExactName = 0; + uint64_t MatchedWithHash = 0; + uint64_t MatchedWithLTOCommonName = 0; + // Computes hash for binary functions. - if (!opts::IgnoreHash) + if (opts::MatchingFunctionsWithHash) { for (auto &[_, BF] : BC.getBinaryFunctions()) BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); + } + else { + for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { + if (!BF) + continue; + BinaryFunction &Function = *BF; + + if (!opts::IgnoreHash) + Function.computeHash(YamlBP.Header.IsDFSOrder, + YamlBP.Header.HashFunction); + } + } // This first pass assigns profiles that match 100% by name and by hash. for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { @@ -380,8 +394,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // the profile. Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE); - if (profileMatches(YamlBF, Function)) + if (profileMatches(YamlBF, Function)) { matchProfileToFunction(YamlBF, Function); + ++MatchedWithExactName; + } } // Uses the strict hash of profiled and binary functions to match functions @@ -402,6 +418,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { !ProfiledFunctions.count(It->second)) { auto *BF = It->second; matchProfileToFunction(YamlBF, *BF); + ++MatchedWithHash; } } } @@ -420,6 +437,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { for (BinaryFunction *BF : Functions) { if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) { matchProfileToFunction(*YamlBF, *BF); + ++MatchedWithLTOCommonName; return true; } } @@ -431,8 +449,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // partially. if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 && !LTOProfiles.front()->Used && - !ProfiledFunctions.count(*Functions.begin())) + !ProfiledFunctions.count(*Functions.begin())) { matchProfileToFunction(*LTOProfiles.front(), **Functions.begin()); + ++MatchedWithLTOCommonName; + } } for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) @@ -444,6 +464,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name << '\n'; + if (opts::Verbosity >= 2) { + outs() << "BOLT-INFO: matched " << MatchedWithExactName + << " functions with identical names\n"; + outs() << "BOLT-INFO: matched " << MatchedWithHash + << " functions with hash\n"; + outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName + << " functions with matching LTO common names\n"; + } + // Set for parseFunctionProfile(). NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions"); NormalizeByCalls = usesEvent("branches"); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 1a3a8af21d81b..c157e45e1d586 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -82,6 +82,7 @@ extern cl::opt<bool> Hugify; extern cl::opt<bool> Instrument; extern cl::opt<JumpTableSupportLevel> JumpTables; extern cl::opt<bool> KeepNops; +extern cl::opt<bool> MatchingFunctionsWithHash; extern cl::list<std::string> ReorderData; extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; extern cl::opt<bool> TerminalTrap; @@ -2982,6 +2983,9 @@ void RewriteInstance::selectFunctionsToProcess() { if (mustSkip(Function)) return false; + if (opts::MatchingFunctionsWithHash) + return true; + // If the list is not empty, only process functions from the list. if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) { // Regex check (-funcs and -funcs-file options). @@ -2998,6 +3002,7 @@ void RewriteInstance::selectFunctionsToProcess() { } if (opts::Lite) { + // Forcibly include functions specified in the -function-order file. if (opts::ReorderFunctions == ReorderFunctions::RT_USER) { for (const StringRef Name : Function.getNames()) @@ -3008,7 +3013,6 @@ void RewriteInstance::selectFunctionsToProcess() { if (ReorderFunctionsLTOCommonSet.contains(*LTOCommonName)) return true; } - if (ProfileReader && !ProfileReader->mayHaveProfileData(Function)) return false; diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 41c89bc8aeba4..58d44a98b6218 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -128,6 +128,11 @@ cl::opt<bool> cl::desc("instrument code to generate accurate profile data"), cl::cat(BoltOptCategory)); +cl::opt<bool> MatchingFunctionsWithHash("stale-matching-matching-functions-with-hash", + cl::desc("Matching functions using hash"), + cl::Hidden, + cl::cat(BoltCategory)); + cl::opt<std::string> OutputFilename("o", cl::desc("<output file>"), diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test index 833450047284a..83819f4c539d3 100644 --- a/bolt/test/X86/hashing-based-function-matching.test +++ b/bolt/test/X86/hashing-based-function-matching.test @@ -6,11 +6,9 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib # RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ -# RUN: --print-cfg 2>&1 | FileCheck %s +# RUN: --print-cfg --stale-matching-matching-functions-with-hash 2>&1 --profile-ignore-hash=0 | FileCheck %s -# CHECK: Binary Function "main" after building cfg -# CHECK: Profile Acc : 100.0% -# CHECK-NOT: BOLT-WARNING: no successor for block .LFT0 that matches index 3 or block .Ltmp0 +# CHECK: BOLT-INFO: matched 1 functions with hash #--- main.s .globl main >From baec71c1a91b3ac7be09ee9774c8542430320ad0 Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Thu, 20 Jun 2024 14:37:10 -0700 Subject: [PATCH 11/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 603620aa483d7..dcc5d23745cb2 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -406,11 +406,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { std::unordered_map<size_t, BinaryFunction *> StrictHashToBF; StrictHashToBF.reserve(BC.getBinaryFunctions().size()); - for (auto &[_, BF] : BC.getBinaryFunctions()) { + for (auto &[_, BF] : BC.getBinaryFunctions()) StrictHashToBF[BF.getHash()] = &BF; - } - for (auto YamlBF : YamlBP.Functions) { + for (yaml::bolt::BinaryFunctionProfile& YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; auto It = StrictHashToBF.find(YamlBF.Hash); >From ff68ace13b1ece9c93322a3f1833f574b493c13b Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Thu, 20 Jun 2024 14:47:04 -0700 Subject: [PATCH 12/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 2 +- bolt/lib/Rewrite/RewriteInstance.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index dcc5d23745cb2..d04d54e1a9170 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -402,7 +402,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses the strict hash of profiled and binary functions to match functions // that are not matched by name or common name. - if (!opts::IgnoreHash) { + if (opts::MatchingFunctionsWithHash) { std::unordered_map<size_t, BinaryFunction *> StrictHashToBF; StrictHashToBF.reserve(BC.getBinaryFunctions().size()); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index c157e45e1d586..7afc7c920318c 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -3002,7 +3002,6 @@ void RewriteInstance::selectFunctionsToProcess() { } if (opts::Lite) { - // Forcibly include functions specified in the -function-order file. if (opts::ReorderFunctions == ReorderFunctions::RT_USER) { for (const StringRef Name : Function.getNames()) @@ -3013,6 +3012,7 @@ void RewriteInstance::selectFunctionsToProcess() { if (ReorderFunctionsLTOCommonSet.contains(*LTOCommonName)) return true; } + if (ProfileReader && !ProfileReader->mayHaveProfileData(Function)) return false; >From ef2eaaa3810e26f97088981450dee5b623ca1143 Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Thu, 20 Jun 2024 14:48:11 -0700 Subject: [PATCH 13/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 14 ++++++-------- bolt/lib/Utils/CommandLineOpts.cpp | 8 ++++---- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index d04d54e1a9170..fd1a02ff63a3b 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -372,8 +372,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { if (opts::MatchingFunctionsWithHash) { for (auto &[_, BF] : BC.getBinaryFunctions()) BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); - } - else { + } else { for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { if (!BF) continue; @@ -409,12 +408,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { for (auto &[_, BF] : BC.getBinaryFunctions()) StrictHashToBF[BF.getHash()] = &BF; - for (yaml::bolt::BinaryFunctionProfile& YamlBF : YamlBP.Functions) { + for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { if (YamlBF.Used) continue; auto It = StrictHashToBF.find(YamlBF.Hash); - if (It != StrictHashToBF.end() && - !ProfiledFunctions.count(It->second)) { + if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) { auto *BF = It->second; matchProfileToFunction(YamlBF, *BF); ++MatchedWithHash; @@ -465,11 +463,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { if (opts::Verbosity >= 2) { outs() << "BOLT-INFO: matched " << MatchedWithExactName - << " functions with identical names\n"; + << " functions with identical names\n"; outs() << "BOLT-INFO: matched " << MatchedWithHash - << " functions with hash\n"; + << " functions with hash\n"; outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName - << " functions with matching LTO common names\n"; + << " functions with matching LTO common names\n"; } // Set for parseFunctionProfile(). diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 58d44a98b6218..8144af29cba4e 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -128,10 +128,10 @@ cl::opt<bool> cl::desc("instrument code to generate accurate profile data"), cl::cat(BoltOptCategory)); -cl::opt<bool> MatchingFunctionsWithHash("stale-matching-matching-functions-with-hash", - cl::desc("Matching functions using hash"), - cl::Hidden, - cl::cat(BoltCategory)); +cl::opt<bool> + MatchingFunctionsWithHash("stale-matching-matching-functions-with-hash", + cl::desc("Matching functions using hash"), + cl::Hidden, cl::cat(BoltCategory)); cl::opt<std::string> OutputFilename("o", >From 3ed600c2f22356177f46e36b88eecc7ebd6e03ff Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Thu, 20 Jun 2024 15:05:10 -0700 Subject: [PATCH 14/21] spr amend Created using spr 1.3.4 --- bolt/docs/CommandLineArgumentReference.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 8887d1f5d5bd4..0a5af3933b453 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -798,6 +798,10 @@ bytes. Default value: 0, i.e. split iff the size is reduced. Note that on some architectures the size can increase after splitting. +- `--stale-matching-matching-functions-with-hash` + + Turns on matching functions with exact hash + - `--stale-matching-max-func-size=<uint>` The maximum size of a function to consider for inference. @@ -1161,4 +1165,4 @@ - `--print-options` - Print non-default options after command line parsing \ No newline at end of file + Print non-default options after command line parsing >From 4b13659daa0b5946e1328bfb85b6e6d32bc26c1c Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Thu, 20 Jun 2024 15:41:14 -0700 Subject: [PATCH 15/21] spr amend Created using spr 1.3.4 --- bolt/test/X86/hashing-based-function-matching.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test index 83819f4c539d3..a6f96ed79bbad 100644 --- a/bolt/test/X86/hashing-based-function-matching.test +++ b/bolt/test/X86/hashing-based-function-matching.test @@ -45,7 +45,7 @@ main: --- header: profile-version: 1 - binary-name: 'profile-passthrough-block.s.tmp.exe' + binary-name: 'hashing-based-function-matching.s.tmp.exe' binary-build-id: '<unknown>' profile-flags: [ lbr ] profile-origin: branch profile reader >From 0c18b86a6c5a19364d36b28cedf583082f94c6f7 Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Fri, 21 Jun 2024 15:18:22 -0700 Subject: [PATCH 16/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index fd1a02ff63a3b..93705ca7767b7 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -369,20 +369,12 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { uint64_t MatchedWithLTOCommonName = 0; // Computes hash for binary functions. - if (opts::MatchingFunctionsWithHash) { + if (opts::MatchingFunctionsWithHash) for (auto &[_, BF] : BC.getBinaryFunctions()) BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); - } else { - for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { - if (!BF) - continue; - BinaryFunction &Function = *BF; - - if (!opts::IgnoreHash) - Function.computeHash(YamlBP.Header.IsDFSOrder, - YamlBP.Header.HashFunction); - } - } + else if (!opts::IgnoreHash) + for (BinaryFunction *BF : ProfileBFs) + BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); // This first pass assigns profiles that match 100% by name and by hash. for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { >From eb6cb88c46c3118308f08ec59e41d4f4d0c51e39 Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Fri, 21 Jun 2024 15:30:00 -0700 Subject: [PATCH 17/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 93705ca7767b7..4a70d923a23b0 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -480,6 +480,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { } BC.setNumUnusedProfiledObjects(NumUnused); + + for (BinaryFunction* BF : BC.getAllBinaryFunctions()) + if (ProfiledFunctions.find(BF) == ProfiledFunctions.end()) + BF->setIgnored(); return Error::success(); } >From a2c9c826cea2414d17bf4096c26d60ffd814ad3b Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Fri, 21 Jun 2024 15:32:54 -0700 Subject: [PATCH 18/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 4a70d923a23b0..2e0180818a9e3 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -23,6 +23,7 @@ extern cl::opt<unsigned> Verbosity; extern cl::OptionCategory BoltOptCategory; extern cl::opt<bool> InferStaleProfile; extern cl::opt<bool> MatchingFunctionsWithHash; +extern cl::opt<bool> Lite; static llvm::cl::opt<bool> IgnoreHash("profile-ignore-hash", @@ -481,9 +482,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { BC.setNumUnusedProfiledObjects(NumUnused); - for (BinaryFunction* BF : BC.getAllBinaryFunctions()) - if (ProfiledFunctions.find(BF) == ProfiledFunctions.end()) - BF->setIgnored(); + if (opts::Lite) + for (BinaryFunction* BF : BC.getAllBinaryFunctions()) + if (ProfiledFunctions.find(BF) == ProfiledFunctions.end()) + BF->setIgnored(); + return Error::success(); } >From 4e1b758a7c5cf99f8509d708bcdea51f83e0090f Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Fri, 21 Jun 2024 15:40:29 -0700 Subject: [PATCH 19/21] spr amend Created using spr 1.3.4 --- bolt/docs/CommandLineArgumentReference.md | 2 +- bolt/lib/Profile/YAMLProfileReader.cpp | 13 ++++++------- bolt/lib/Rewrite/RewriteInstance.cpp | 4 ++-- bolt/lib/Utils/CommandLineOpts.cpp | 6 +++--- bolt/test/X86/hashing-based-function-matching.test | 2 +- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 0a5af3933b453..994651d76342d 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -798,7 +798,7 @@ bytes. Default value: 0, i.e. split iff the size is reduced. Note that on some architectures the size can increase after splitting. -- `--stale-matching-matching-functions-with-hash` +- `--match-profile-with-function-hash` Turns on matching functions with exact hash diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 2e0180818a9e3..94aaac532d5ca 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -22,7 +22,7 @@ namespace opts { extern cl::opt<unsigned> Verbosity; extern cl::OptionCategory BoltOptCategory; extern cl::opt<bool> InferStaleProfile; -extern cl::opt<bool> MatchingFunctionsWithHash; +extern cl::opt<bool> MatchProfileWithFunctionHash; extern cl::opt<bool> Lite; static llvm::cl::opt<bool> @@ -370,7 +370,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { uint64_t MatchedWithLTOCommonName = 0; // Computes hash for binary functions. - if (opts::MatchingFunctionsWithHash) + if (opts::MatchProfileWithFunctionHash) for (auto &[_, BF] : BC.getBinaryFunctions()) BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); else if (!opts::IgnoreHash) @@ -394,7 +394,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Uses the strict hash of profiled and binary functions to match functions // that are not matched by name or common name. - if (opts::MatchingFunctionsWithHash) { + if (opts::MatchProfileWithFunctionHash) { std::unordered_map<size_t, BinaryFunction *> StrictHashToBF; StrictHashToBF.reserve(BC.getBinaryFunctions().size()); @@ -481,12 +481,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { } BC.setNumUnusedProfiledObjects(NumUnused); - - if (opts::Lite) - for (BinaryFunction* BF : BC.getAllBinaryFunctions()) + + if (opts::Lite) + for (BinaryFunction *BF : BC.getAllBinaryFunctions()) if (ProfiledFunctions.find(BF) == ProfiledFunctions.end()) BF->setIgnored(); - return Error::success(); } diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 7afc7c920318c..ec234add8a6f5 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -82,7 +82,7 @@ extern cl::opt<bool> Hugify; extern cl::opt<bool> Instrument; extern cl::opt<JumpTableSupportLevel> JumpTables; extern cl::opt<bool> KeepNops; -extern cl::opt<bool> MatchingFunctionsWithHash; +extern cl::opt<bool> MatchProfileWithFunctionHash; extern cl::list<std::string> ReorderData; extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; extern cl::opt<bool> TerminalTrap; @@ -2983,7 +2983,7 @@ void RewriteInstance::selectFunctionsToProcess() { if (mustSkip(Function)) return false; - if (opts::MatchingFunctionsWithHash) + if (opts::MatchProfileWithFunctionHash) return true; // If the list is not empty, only process functions from the list. diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 8144af29cba4e..0724e627dfd19 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -129,9 +129,9 @@ cl::opt<bool> cl::cat(BoltOptCategory)); cl::opt<bool> - MatchingFunctionsWithHash("stale-matching-matching-functions-with-hash", - cl::desc("Matching functions using hash"), - cl::Hidden, cl::cat(BoltCategory)); + MatchProfileWithFunctionHash("match-profile-with-function-hash", + cl::desc("Match profile with function hash"), + cl::Hidden, cl::cat(BoltCategory)); cl::opt<std::string> OutputFilename("o", diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test index a6f96ed79bbad..60980b639b7e8 100644 --- a/bolt/test/X86/hashing-based-function-matching.test +++ b/bolt/test/X86/hashing-based-function-matching.test @@ -6,7 +6,7 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib # RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ -# RUN: --print-cfg --stale-matching-matching-functions-with-hash 2>&1 --profile-ignore-hash=0 | FileCheck %s +# RUN: --print-cfg --match-profile-with-function-hash 2>&1 --profile-ignore-hash=0 | FileCheck %s # CHECK: BOLT-INFO: matched 1 functions with hash >From f2ba96ab823bafcb45416a3f78900a4ead4b67b7 Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Sat, 22 Jun 2024 22:09:07 -0700 Subject: [PATCH 20/21] spr amend Created using spr 1.3.4 --- bolt/lib/Profile/YAMLProfileReader.cpp | 4 ++-- bolt/test/X86/hashing-based-function-matching.test | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 94aaac532d5ca..6c4eece4ddb1b 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -406,7 +406,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { continue; auto It = StrictHashToBF.find(YamlBF.Hash); if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) { - auto *BF = It->second; + BinaryFunction *BF = It->second; matchProfileToFunction(YamlBF, *BF); ++MatchedWithHash; } @@ -484,7 +484,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { if (opts::Lite) for (BinaryFunction *BF : BC.getAllBinaryFunctions()) - if (ProfiledFunctions.find(BF) == ProfiledFunctions.end()) + if (!BF->hasProfile()) BF->setIgnored(); return Error::success(); diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test index 60980b639b7e8..4426da085bbd9 100644 --- a/bolt/test/X86/hashing-based-function-matching.test +++ b/bolt/test/X86/hashing-based-function-matching.test @@ -1,5 +1,4 @@ -## Test YAMLProfileReader support for pass-through blocks in non-matching edges: -## match the profile edge A -> C to the CFG with blocks A -> B -> C. +## Tests function matching in YAMLProfileReader by function hash. # REQUIRES: system-linux # RUN: split-file %s %t >From 6a5618851186cb88180e92ad0d9efe3aa62bfe43 Mon Sep 17 00:00:00 2001 From: shawbyoung <shawbyo...@gmail.com> Date: Mon, 24 Jun 2024 13:16:58 -0700 Subject: [PATCH 21/21] Drop end of line Created using spr 1.3.4 --- bolt/docs/CommandLineArgumentReference.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md index 994651d76342d..7a77e8eda18b2 100644 --- a/bolt/docs/CommandLineArgumentReference.md +++ b/bolt/docs/CommandLineArgumentReference.md @@ -798,14 +798,15 @@ bytes. Default value: 0, i.e. split iff the size is reduced. Note that on some architectures the size can increase after splitting. -- `--match-profile-with-function-hash` - - Turns on matching functions with exact hash - - `--stale-matching-max-func-size=<uint>` The maximum size of a function to consider for inference. +- `--stale-matching-min-matched-block=<uint>` + + Percentage threshold of matched basic blocks at which stale profile inference + is executed. + - `--stale-threshold=<uint>` Maximum percentage of stale functions to tolerate (default: 100) @@ -1165,4 +1166,4 @@ - `--print-options` - Print non-default options after command line parsing + Print non-default options after command line parsing \ No newline at end of file _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits