https://github.com/kyulee-com created https://github.com/llvm/llvm-project/pull/112664
This introduces a new cgdata format for stable function maps. The raw data is embedded in the __llvm_merge section during compile time. This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal. >From af5931f2a7aa020afed0ad474b6e6a7e4c564703 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee <kyu...@meta.com> Date: Mon, 9 Sep 2024 19:38:05 -0700 Subject: [PATCH] [CGData][llvm-cgdata] Support for stable function map This introduces a new cgdata format for stable function maps. The raw data is embedded in the __llvm_merge section during compile time. This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal. --- llvm/docs/CommandGuide/llvm-cgdata.rst | 16 ++-- llvm/include/llvm/CGData/CodeGenData.h | 24 +++++- llvm/include/llvm/CGData/CodeGenData.inc | 12 ++- llvm/include/llvm/CGData/CodeGenDataReader.h | 29 ++++++- llvm/include/llvm/CGData/CodeGenDataWriter.h | 17 +++- llvm/lib/CGData/CodeGenData.cpp | 30 ++++--- llvm/lib/CGData/CodeGenDataReader.cpp | 63 +++++++++----- llvm/lib/CGData/CodeGenDataWriter.cpp | 30 ++++++- llvm/test/tools/llvm-cgdata/empty.test | 8 +- llvm/test/tools/llvm-cgdata/error.test | 13 +-- .../merge-combined-funcmap-hashtree.test | 66 +++++++++++++++ .../llvm-cgdata/merge-funcmap-archive.test | 83 +++++++++++++++++++ .../llvm-cgdata/merge-funcmap-concat.test | 78 +++++++++++++++++ .../llvm-cgdata/merge-funcmap-double.test | 79 ++++++++++++++++++ .../llvm-cgdata/merge-funcmap-single.test | 36 ++++++++ ...chive.test => merge-hashtree-archive.test} | 8 +- ...concat.test => merge-hashtree-concat.test} | 6 +- ...double.test => merge-hashtree-double.test} | 8 +- ...single.test => merge-hashtree-single.test} | 4 +- llvm/tools/llvm-cgdata/llvm-cgdata.cpp | 46 +++++++--- 20 files changed, 572 insertions(+), 84 deletions(-) create mode 100644 llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-double.test create mode 100644 llvm/test/tools/llvm-cgdata/merge-funcmap-single.test rename llvm/test/tools/llvm-cgdata/{merge-archive.test => merge-hashtree-archive.test} (91%) rename llvm/test/tools/llvm-cgdata/{merge-concat.test => merge-hashtree-concat.test} (93%) rename llvm/test/tools/llvm-cgdata/{merge-double.test => merge-hashtree-double.test} (90%) rename llvm/test/tools/llvm-cgdata/{merge-single.test => merge-hashtree-single.test} (92%) diff --git a/llvm/docs/CommandGuide/llvm-cgdata.rst b/llvm/docs/CommandGuide/llvm-cgdata.rst index f592e1508844ee..0670decd087e39 100644 --- a/llvm/docs/CommandGuide/llvm-cgdata.rst +++ b/llvm/docs/CommandGuide/llvm-cgdata.rst @@ -11,15 +11,13 @@ SYNOPSIS DESCRIPTION ----------- -The :program:llvm-cgdata utility parses raw codegen data embedded -in compiled binary files and merges them into a single .cgdata file. -It can also inspect and manipulate .cgdata files. -Currently, the tool supports saving and restoring outlined hash trees, -enabling global function outlining across modules, allowing for more -efficient function outlining in subsequent compilations. -The design is extensible, allowing for the incorporation of additional -codegen summaries and optimization techniques, such as global function -merging, in the future. +The :program:llvm-cgdata utility parses raw codegen data embedded in compiled +binary files and merges them into a single .cgdata file. It can also inspect +and manipulate .cgdata files. Currently, the tool supports saving and restoring +outlined hash trees and stable function maps, allowing for more efficient +function outlining and function merging across modules in subsequent +compilations. The design is extensible, allowing for the incorporation of +additional codegen summaries and optimization techniques. COMMANDS -------- diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 53550beeae1f83..5d7c74725ccef1 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -19,6 +19,7 @@ #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CGData/OutlinedHashTree.h" #include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/StableFunctionMapRecord.h" #include "llvm/IR/Module.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Caching.h" @@ -41,7 +42,9 @@ enum class CGDataKind { Unknown = 0x0, // A function outlining info. FunctionOutlinedHashTree = 0x1, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree) + // A function merging info. + StableFunctionMergingMap = 0x2, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap) }; const std::error_category &cgdata_category(); @@ -108,6 +111,8 @@ enum CGDataMode { class CodeGenData { /// Global outlined hash tree that has oulined hash sequences across modules. std::unique_ptr<OutlinedHashTree> PublishedHashTree; + /// Global stable function map that has stable function info across modules. + std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap; /// This flag is set when -fcodegen-data-generate is passed. /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds. @@ -131,6 +136,9 @@ class CodeGenData { bool hasOutlinedHashTree() { return PublishedHashTree && !PublishedHashTree->empty(); } + bool hasStableFunctionMap() { + return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty(); + } /// Returns the outlined hash tree. This can be globally used in a read-only /// manner. @@ -147,6 +155,12 @@ class CodeGenData { // Ensure we disable emitCGData as we do not want to read and write both. EmitCGData = false; } + void + publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) { + PublishedStableFunctionMap = std::move(FunctionMap); + // Ensure we disable emitCGData as we do not want to read and write both. + EmitCGData = false; + } }; namespace cgdata { @@ -166,6 +180,11 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +inline void +publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) { + CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap)); +} + struct StreamCacheData { /// Backing buffer for serialized data stream. SmallVector<SmallString<0>> Outputs; @@ -249,6 +268,8 @@ enum CGDataVersion { // Version 1 is the first version. This version supports the outlined // hash tree. Version1 = 1, + // Version 2 supports the stable function merging map. + Version2 = 2, CurrentVersion = CG_DATA_INDEX_VERSION }; const uint64_t Version = CGDataVersion::CurrentVersion; @@ -258,6 +279,7 @@ struct Header { uint32_t Version; uint32_t DataKind; uint64_t OutlinedHashTreeOffset; + uint64_t StableFunctionMapOffset; // New fields should only be added at the end to ensure that the size // computation is correct. The methods below need to be updated to ensure that diff --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc index 08ec14ea051a0c..e0ae7a51024d87 100644 --- a/llvm/include/llvm/CGData/CodeGenData.inc +++ b/llvm/include/llvm/CGData/CodeGenData.inc @@ -20,6 +20,8 @@ #define CG_DATA_DEFINED CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON), CG_DATA_OUTLINE_COFF, "__DATA,") +CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON), + CG_DATA_MERGE_COFF, "__DATA,") #undef CG_DATA_SECT_ENTRY #endif @@ -27,20 +29,24 @@ CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON), /* section name strings common to all targets other than WIN32 */ #define CG_DATA_OUTLINE_COMMON __llvm_outline +#define CG_DATA_MERGE_COMMON __llvm_merge /* Since cg data sections are not allocated, we don't need to * access them at runtime. */ #define CG_DATA_OUTLINE_COFF ".loutline" +#define CG_DATA_MERGE_COFF ".lmerge" #ifdef _WIN32 /* Runtime section names and name strings. */ -#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF +#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_OUTLINE_COFF +#define CG_DATA_MERGE_SECT_NAME CG_DATA_MERGE_COFF #else /* Runtime section names and name strings. */ -#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON) +#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON) +#define CG_DATA_MERGE_SECT_NAME CG_DATA_QUOTE(CG_DATA_MERGE_COMMON) #endif /* Indexed codegen data format version (start from 1). */ -#define CG_DATA_INDEX_VERSION 1 +#define CG_DATA_INDEX_VERSION 2 diff --git a/llvm/include/llvm/CGData/CodeGenDataReader.h b/llvm/include/llvm/CGData/CodeGenDataReader.h index 7e4882df2116e2..085dd6dd747c90 100644 --- a/llvm/include/llvm/CGData/CodeGenDataReader.h +++ b/llvm/include/llvm/CGData/CodeGenDataReader.h @@ -15,6 +15,7 @@ #include "llvm/CGData/CodeGenData.h" #include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/StableFunctionMapRecord.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/VirtualFileSystem.h" @@ -36,10 +37,15 @@ class CodeGenDataReader { virtual CGDataKind getDataKind() const = 0; /// Return true if the data has an outlined hash tree. virtual bool hasOutlinedHashTree() const = 0; + /// Return true if the data has a stable function map. + virtual bool hasStableFunctionMap() const = 0; /// Return the outlined hash tree that is released from the reader. std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() { return std::move(HashTreeRecord.HashTree); } + std::unique_ptr<StableFunctionMap> releaseStableFunctionMap() { + return std::move(FunctionMapRecord.FunctionMap); + } /// Factory method to create an appropriately typed reader for the given /// codegen data file path and file system. @@ -56,15 +62,21 @@ class CodeGenDataReader { /// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds. /// Optionally, \p CombinedHash can be used to compuate the combined hash of /// the merged data. - static Error mergeFromObjectFile(const object::ObjectFile *Obj, - OutlinedHashTreeRecord &GlobalOutlineRecord, - stable_hash *CombinedHash = nullptr); + static Error + mergeFromObjectFile(const object::ObjectFile *Obj, + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord, + stable_hash *CombinedHash = nullptr); protected: /// The outlined hash tree that has been read. When it's released by /// releaseOutlinedHashTree(), it's no longer valid. OutlinedHashTreeRecord HashTreeRecord; + /// The stable function map that has been read. When it's released by + // releaseStableFunctionMap(), it's no longer valid. + StableFunctionMapRecord FunctionMapRecord; + /// Set the current error and return same. Error error(cgdata_error Err, const std::string &ErrMsg = "") { LastError = Err; @@ -115,6 +127,11 @@ class IndexedCodeGenDataReader : public CodeGenDataReader { return Header.DataKind & static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); } + /// Return true if the header indicates the data has a stable function map. + bool hasStableFunctionMap() const override { + return Header.DataKind & + static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap); + } }; /// This format is a simple text format that's suitable for test data. @@ -150,6 +167,12 @@ class TextCodeGenDataReader : public CodeGenDataReader { return static_cast<uint32_t>(DataKind) & static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); } + /// Return true if the header indicates the data has a stable function map. + /// This does not mean that the data is still available. + bool hasStableFunctionMap() const override { + return static_cast<uint32_t>(DataKind) & + static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap); + } }; } // end namespace llvm diff --git a/llvm/include/llvm/CGData/CodeGenDataWriter.h b/llvm/include/llvm/CGData/CodeGenDataWriter.h index 5cb8377b1d07e5..1c4247608999a7 100644 --- a/llvm/include/llvm/CGData/CodeGenDataWriter.h +++ b/llvm/include/llvm/CGData/CodeGenDataWriter.h @@ -15,6 +15,7 @@ #include "llvm/CGData/CodeGenData.h" #include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/StableFunctionMapRecord.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" @@ -57,6 +58,9 @@ class CodeGenDataWriter { /// The outlined hash tree to be written. OutlinedHashTreeRecord HashTreeRecord; + /// The stable function map to be written. + StableFunctionMapRecord FunctionMapRecord; + /// A bit mask describing the kind of the codegen data. CGDataKind DataKind = CGDataKind::Unknown; @@ -64,9 +68,12 @@ class CodeGenDataWriter { CodeGenDataWriter() = default; ~CodeGenDataWriter() = default; - /// Add the outlined hash tree record. The input Record is released. + /// Add the outlined hash tree record. The input hash tree is released. void addRecord(OutlinedHashTreeRecord &Record); + /// Add the stable function map record. The input function map is released. + void addRecord(StableFunctionMapRecord &Record); + /// Write the codegen data to \c OS Error write(raw_fd_ostream &OS); @@ -81,11 +88,19 @@ class CodeGenDataWriter { return static_cast<uint32_t>(DataKind) & static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); } + /// Return true if the header indicates the data has a stable function map. + bool hasStableFunctionMap() const { + return static_cast<uint32_t>(DataKind) & + static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap); + } private: /// The offset of the outlined hash tree in the file. uint64_t OutlinedHashTreeOffset; + /// The offset of the stable function map in the file. + uint64_t StableFunctionMapOffset; + /// Write the codegen data header to \c COS Error writeHeader(CGDataOStream &COS); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index c56a8b77a52319..a8eddc202dd0aa 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -14,6 +14,7 @@ #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CGData/CodeGenDataReader.h" #include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/StableFunctionMapRecord.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Caching.h" #include "llvm/Support/CommandLine.h" @@ -163,6 +164,8 @@ CodeGenData &CodeGenData::getInstance() { auto Reader = ReaderOrErr->get(); if (Reader->hasOutlinedHashTree()) Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree()); + if (Reader->hasStableFunctionMap()) + Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap()); } }); return *(Instance.get()); @@ -185,18 +188,14 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) { return make_error<CGDataError>(cgdata_error::unsupported_version); H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); - switch (H.Version) { - // When a new field is added to the header add a case statement here to - // compute the size as offset of the new field + size of the new field. This - // relies on the field being added to the end of the list. - static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1, - "Please update the size computation below if a new field has " - "been added to the header, if not add a case statement to " - "fall through to the latest version."); - case 1ull: - H.OutlinedHashTreeOffset = + static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2, + "Please update the offset computation below if a new field has " + "been added to the header."); + H.OutlinedHashTreeOffset = + endian::readNext<uint64_t, endianness::little, unaligned>(Curr); + if (H.Version >= 2) + H.StableFunctionMapOffset = endian::readNext<uint64_t, endianness::little, unaligned>(Curr); - } return H; } @@ -257,6 +256,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule, Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) { OutlinedHashTreeRecord GlobalOutlineRecord; + StableFunctionMapRecord GlobalStableFunctionMapRecord; stable_hash CombinedHash = 0; for (auto File : ObjFiles) { if (File.empty()) @@ -270,12 +270,18 @@ Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) { std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get(); if (auto E = CodeGenDataReader::mergeFromObjectFile( - Obj.get(), GlobalOutlineRecord, &CombinedHash)) + Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord, + &CombinedHash)) return E; } + GlobalStableFunctionMapRecord.finalize(); + if (!GlobalOutlineRecord.empty()) cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree)); + if (!GlobalStableFunctionMapRecord.empty()) + cgdata::publishStableFunctionMap( + std::move(GlobalStableFunctionMapRecord.FunctionMap)); return CombinedHash; } diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp index 2f2481ea60f822..ebeb4ae36f99f3 100644 --- a/llvm/lib/CGData/CodeGenDataReader.cpp +++ b/llvm/lib/CGData/CodeGenDataReader.cpp @@ -32,10 +32,40 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { Error CodeGenDataReader::mergeFromObjectFile( const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord, stable_hash *CombinedHash) { Triple TT = Obj->makeTriple(); auto CGOutLineName = getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false); + auto CGMergeName = + getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false); + + auto processSectionContents = [&](const StringRef &Name, + const StringRef &Contents) { + if (Name != CGOutLineName && Name != CGMergeName) + return; + if (CombinedHash) + *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents)); + auto *Data = reinterpret_cast<const unsigned char *>(Contents.data()); + auto *EndData = Data + Contents.size(); + // In case dealing with an executable that has concatenated cgdata, + // we want to merge them into a single cgdata. + // Although it's not a typical workflow, we support this scenario + // by looping over all data in the sections. + if (Name == CGOutLineName) { + while (Data != EndData) { + OutlinedHashTreeRecord LocalOutlineRecord; + LocalOutlineRecord.deserialize(Data); + GlobalOutlineRecord.merge(LocalOutlineRecord); + } + } else if (Name == CGMergeName) { + while (Data != EndData) { + StableFunctionMapRecord LocalFunctionMapRecord; + LocalFunctionMapRecord.deserialize(Data); + GlobalFunctionMapRecord.merge(LocalFunctionMapRecord); + } + } + }; for (auto &Section : Obj->sections()) { Expected<StringRef> NameOrErr = Section.getName(); @@ -44,23 +74,7 @@ Error CodeGenDataReader::mergeFromObjectFile( Expected<StringRef> ContentsOrErr = Section.getContents(); if (!ContentsOrErr) return ContentsOrErr.takeError(); - auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data()); - auto *EndData = Data + ContentsOrErr->size(); - - if (*NameOrErr == CGOutLineName) { - if (CombinedHash) - *CombinedHash = - stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr)); - // In case dealing with an executable that has concatenated cgdata, - // we want to merge them into a single cgdata. - // Although it's not a typical workflow, we support this scenario. - while (Data != EndData) { - OutlinedHashTreeRecord LocalOutlineRecord; - LocalOutlineRecord.deserialize(Data); - GlobalOutlineRecord.merge(LocalOutlineRecord); - } - } - // TODO: Add support for other cgdata sections. + processSectionContents(*NameOrErr, *ContentsOrErr); } return Error::success(); @@ -69,7 +83,8 @@ Error CodeGenDataReader::mergeFromObjectFile( Error IndexedCodeGenDataReader::read() { using namespace support; - // The smallest header with the version 1 is 24 bytes + // The smallest header with the version 1 is 24 bytes. + // Do not update this value even with the new version of the header. const unsigned MinHeaderSize = 24; if (DataBuffer->getBufferSize() < MinHeaderSize) return error(cgdata_error::bad_header); @@ -87,6 +102,12 @@ Error IndexedCodeGenDataReader::read() { return error(cgdata_error::eof); HashTreeRecord.deserialize(Ptr); } + if (hasStableFunctionMap()) { + const unsigned char *Ptr = Start + Header.StableFunctionMapOffset; + if (Ptr >= End) + return error(cgdata_error::eof); + FunctionMapRecord.deserialize(Ptr); + } return success(); } @@ -152,6 +173,8 @@ Error TextCodeGenDataReader::read() { StringRef Str = Line->drop_front().rtrim(); if (Str.equals_insensitive("outlined_hash_tree")) DataKind |= CGDataKind::FunctionOutlinedHashTree; + else if (Str.equals_insensitive("stable_function_map")) + DataKind |= CGDataKind::StableFunctionMergingMap; else return error(cgdata_error::bad_header); } @@ -170,8 +193,8 @@ Error TextCodeGenDataReader::read() { yaml::Input YOS(StringRef(Pos, Size)); if (hasOutlinedHashTree()) HashTreeRecord.deserializeYAML(YOS); - - // TODO: Add more yaml cgdata in order + if (hasStableFunctionMap()) + FunctionMapRecord.deserializeYAML(YOS); return Error::success(); } diff --git a/llvm/lib/CGData/CodeGenDataWriter.cpp b/llvm/lib/CGData/CodeGenDataWriter.cpp index 5f638be0fefe74..54ef44b9269c1e 100644 --- a/llvm/lib/CGData/CodeGenDataWriter.cpp +++ b/llvm/lib/CGData/CodeGenDataWriter.cpp @@ -52,6 +52,13 @@ void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) { DataKind |= CGDataKind::FunctionOutlinedHashTree; } +void CodeGenDataWriter::addRecord(StableFunctionMapRecord &Record) { + assert(Record.StableHashTree && "empty function map in the record"); + FunctionMapRecord.FunctionMap = std::move(Record.FunctionMap); + + DataKind |= CGDataKind::StableFunctionMergingMap; +} + Error CodeGenDataWriter::write(raw_fd_ostream &OS) { CGDataOStream COS(OS); return writeImpl(COS); @@ -68,8 +75,11 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) { if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree)) Header.DataKind |= static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); - + if (static_cast<bool>(DataKind & CGDataKind::StableFunctionMergingMap)) + Header.DataKind |= + static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap); Header.OutlinedHashTreeOffset = 0; + Header.StableFunctionMapOffset = 0; // Only write up to the CGDataKind. We need to remember the offset of the // remaining fields to allow back-patching later. @@ -83,6 +93,12 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) { // Reserve the space for OutlinedHashTreeOffset field. COS.write(0); + // Save the location of Header.StableFunctionMapOffset field in \c COS. + StableFunctionMapOffset = COS.tell(); + + // Reserve the space for StableFunctionMapOffset field. + COS.write(0); + return Error::success(); } @@ -93,10 +109,14 @@ Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) { uint64_t OutlinedHashTreeFieldStart = COS.tell(); if (hasOutlinedHashTree()) HashTreeRecord.serialize(COS.OS); + uint64_t StableFunctionMapFieldStart = COS.tell(); + if (hasStableFunctionMap()) + FunctionMapRecord.serialize(COS.OS); // Back patch the offsets. CGDataPatchItem PatchItems[] = { - {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}}; + {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}, + {StableFunctionMapOffset, &StableFunctionMapFieldStart, 1}}; COS.patch(PatchItems); return Error::success(); @@ -106,6 +126,9 @@ Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) { if (hasOutlinedHashTree()) OS << "# Outlined stable hash tree\n:outlined_hash_tree\n"; + if (hasStableFunctionMap()) + OS << "# Stable function map\n:stable_function_map\n"; + // TODO: Add more data types in this header return Error::success(); @@ -119,6 +142,9 @@ Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) { if (hasOutlinedHashTree()) HashTreeRecord.serializeYAML(YOS); + if (hasStableFunctionMap()) + FunctionMapRecord.serializeYAML(YOS); + // TODO: Write more yaml cgdata in order return Error::success(); diff --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test index 70d5ea4b800630..bea78d512a6db7 100644 --- a/llvm/test/tools/llvm-cgdata/empty.test +++ b/llvm/test/tools/llvm-cgdata/empty.test @@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0 # The version number appears when asked, as it's in the header RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION -VERSION: Version: 1 +VERSION: Version: 2 # When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header. RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0 @@ -27,9 +27,11 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0 # uint32_t Version; # uint32_t DataKind; # uint64_t OutlinedHashTreeOffset; +# uint64_t StableFunctionMapOffset; # } RUN: printf '\xffcgdata\x81' > %t_header.cgdata -RUN: printf '\x01\x00\x00\x00' >> %t_header.cgdata +RUN: printf '\x02\x00\x00\x00' >> %t_header.cgdata RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata -RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata RUN: diff %t_header.cgdata %t_emptyheader.cgdata diff --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test index c992174505c1ad..2caa3aef403950 100644 --- a/llvm/test/tools/llvm-cgdata/error.test +++ b/llvm/test/tools/llvm-cgdata/error.test @@ -6,6 +6,7 @@ # uint32_t Version; # uint32_t DataKind; # uint64_t OutlinedHashTreeOffset; +# uint64_t StableFunctionMapOffset; # } RUN: touch %t_empty.cgdata RUN: not llvm-cgdata --show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY @@ -21,18 +22,20 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt) -# The current version 1 while the header says 2. +# The current version 2 while the header says 3. RUN: printf '\xffcgdata\x81' > %t_version.cgdata -RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata +RUN: printf '\x03\x00\x00\x00' >> %t_version.cgdata RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata -RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata RUN: not llvm-cgdata --show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix=BAD_VERSION BAD_VERSION: {{.}}cgdata: unsupported codegen data version # Header says an outlined hash tree, but the file ends after the header. RUN: printf '\xffcgdata\x81' > %t_eof.cgdata +RUN: printf '\x02\x00\x00\x00' >> %t_eof.cgdata RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata -RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata -RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata RUN: not llvm-cgdata --show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix=EOF EOF: {{.}}cgdata: end of File diff --git a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test new file mode 100644 index 00000000000000..b9bf067d3771c5 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test @@ -0,0 +1,66 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Test merge a single object file having both __llvm_outline and __llvm_merge into a cgdata. +# Effectively, this test combines merge-hashtree.test and merge-funcmap.test. + +RUN: split-file %s %t + +# Synthesize raw hashtree bytes without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-hashtree.cgtext -o %t/raw-hashtree.cgdata +RUN: od -t x1 -j 32 -An %t/raw-hashtree.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-hashtree-bytes.txt + +# Synthesize raw funcmap bytes without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-funcmap.cgtext -o %t/raw-funcmap.cgdata +RUN: od -t x1 -j 32 -An %t/raw-funcmap.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-funcmap-bytes.txt + +# Synthesize a bitcode file by creating two sections for the hash tree and the function map, respectively. +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-hashtree-bytes.txt)/g" %t/merge-both-template.ll > %t/merge-both-hashtree-template.ll +RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-funcmap-bytes.txt)/g" %t/merge-both-hashtree-template.ll > %t/merge-both-hashtree-funcmap.ll + +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcmap.ll -o %t/merge-both-hashtree-funcmap.o + +# Merge an object file having cgdata (__llvm_outline and __llvm_merge) +RUN: llvm-cgdata -m %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata +RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s + +CHECK: Outlined hash tree: +CHECK-NEXT: Total Node Count: 3 +CHECK-NEXT: Terminal Node Count: 1 +CHECK-NEXT: Depth: 2 +CHECK-NEXT: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 1 +CHECK-NEXT: Mergeable function Count: 0 + +;--- raw-hashtree.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x2 + Terminals: 4 + SuccessorIds: [ ] +... + +;--- raw-funcmap.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-both-template.ll +@.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline" +@.data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test new file mode 100644 index 00000000000000..f643c8d92073e3 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test @@ -0,0 +1,83 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Merge an archive that has two object files having cgdata (__llvm_merge) + +RUN: split-file %s %t + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o + +# Make an archive from two object files +RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o + +# Merge the archive into the codegen data file. +RUN: llvm-cgdata --merge %t/merge-archive.a -o %t/merge-archive.cgdata +RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s + +RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s +CHECK: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 2 +CHECK-NEXT: Mergeable function Count: 2 + +RUN: llvm-cgdata --convert %t/merge-archive.cgdata| FileCheck %s --check-prefix=MAP +MAP: # Stable function map +MAP-NEXT: :stable_function_map +MAP-NEXT: --- +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func1 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 3 +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func2 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 4 +MAP-NEXT: ... + +;--- raw-1.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func2 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 4 +... + +;--- merge-1-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge" + +;--- raw-2.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-2-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test new file mode 100644 index 00000000000000..c8acf1f3916e5a --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test @@ -0,0 +1,78 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Merge a binary file (e.g., a linked executable) having concatenated cgdata (__llvm_merge) + +RUN: split-file %s %t + +# Synthesize two sets of raw cgdata without the header (32 byte) from the indexed cgdata. +# Concatenate them in merge-concat.ll +RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat-template.ll > %t/merge-concat-template-2.ll +RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll + +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o +RUN: llvm-cgdata --merge %t/merge-concat.o -o %t/merge-concat.cgdata +RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s + +CHECK: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 2 +CHECK-NEXT: Mergeable function Count: 2 + +RUN: llvm-cgdata --convert %t/merge-concat.cgdata| FileCheck %s --check-prefix=MAP +MAP: # Stable function map +MAP-NEXT: :stable_function_map +MAP-NEXT: --- +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func1 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 3 +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func2 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 4 +MAP-NEXT: ... + +;--- raw-1.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func2 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 4 +... + +;--- raw-2.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-concat-template.ll + +; In an linked executable (as opposed to an object file), cgdata in __llvm_merge might be concatenated. +; Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. +; In other words, the following two trees are encoded back-to-back in a binary format. +@.data1 = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge" +@.data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test new file mode 100644 index 00000000000000..3ae67f062f820f --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test @@ -0,0 +1,79 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Merge two object files having cgdata (__llvm_merge) + +RUN: split-file %s %t + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o + +# Merge two object files into the codegen data file. +RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata + +RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s +CHECK: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 2 +CHECK-NEXT: Mergeable function Count: 2 + +RUN: llvm-cgdata --convert %t/merge.cgdata | FileCheck %s --check-prefix=MAP +MAP: # Stable function map +MAP-NEXT: :stable_function_map +MAP-NEXT: --- +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func1 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 3 +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func2 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 4 +MAP-NEXT: ... + +;--- raw-1.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func2 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 4 +... + +;--- merge-1-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge" + +;--- raw-2.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-2-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test new file mode 100644 index 00000000000000..6a4e635f638657 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test @@ -0,0 +1,36 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Test merge a single object file into a cgdata + +RUN: split-file %s %t + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata +RUN: od -t x1 -j 32 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt + +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-template.ll > %t/merge-single.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o + +# Merge an object file having cgdata (__llvm_merge) +RUN: llvm-cgdata -m %t/merge-single.o -o %t/merge-single.cgdata +RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s +CHECK: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 1 +CHECK-NEXT: Mergeable function Count: 0 + +;--- raw-single.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-single-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-archive.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test similarity index 91% rename from llvm/test/tools/llvm-cgdata/merge-archive.test rename to llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test index 03eb9106b54562..ee6345247c5be6 100644 --- a/llvm/test/tools/llvm-cgdata/merge-archive.test +++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test @@ -5,15 +5,15 @@ RUN: split-file %s %t -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata -RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata -RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o diff --git a/llvm/test/tools/llvm-cgdata/merge-concat.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test similarity index 93% rename from llvm/test/tools/llvm-cgdata/merge-concat.test rename to llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test index ac0e7a6e29e878..5a3ece05a3f990 100644 --- a/llvm/test/tools/llvm-cgdata/merge-concat.test +++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test @@ -5,13 +5,13 @@ RUN: split-file %s %t -# Synthesize two sets of raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize two sets of raw cgdata without the header (32 byte) from the indexed cgdata. # Concatenate them in merge-concat.ll RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata -RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat-template.ll > %t/merge-concat-template-2.ll RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata -RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o diff --git a/llvm/test/tools/llvm-cgdata/merge-double.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-double.test similarity index 90% rename from llvm/test/tools/llvm-cgdata/merge-double.test rename to llvm/test/tools/llvm-cgdata/merge-hashtree-double.test index 1ae8064291019e..044a8649cf4adf 100644 --- a/llvm/test/tools/llvm-cgdata/merge-double.test +++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-double.test @@ -5,15 +5,15 @@ RUN: split-file %s %t -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata -RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata -RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o diff --git a/llvm/test/tools/llvm-cgdata/merge-single.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-single.test similarity index 92% rename from llvm/test/tools/llvm-cgdata/merge-single.test rename to llvm/test/tools/llvm-cgdata/merge-hashtree-single.test index 47e3cb3f4f50fb..829c63f0f17a2c 100644 --- a/llvm/test/tools/llvm-cgdata/merge-single.test +++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-single.test @@ -11,9 +11,9 @@ RUN: llvm-cgdata --merge %t/merge-empty.o --output %t/merge-empty.cgdata # No summary appear with the header only cgdata. RUN: llvm-cgdata --show %t/merge-empty.cgdata | count 0 -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata -RUN: od -t x1 -j 24 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-template.ll > %t/merge-single.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp index 483f4662631284..18de1f6b14552a 100644 --- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp +++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp @@ -80,8 +80,6 @@ static CGDataAction Action; static std::optional<CGDataFormat> OutputFormat; static std::vector<std::string> InputFilenames; -// TODO: Add a doc, https://llvm.org/docs/CommandGuide/llvm-cgdata.html - static void exitWithError(Twine Message, std::string Whence = "", std::string Hint = "") { WithColor::error(); @@ -128,6 +126,10 @@ static int convert_main(int argc, const char *argv[]) { OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree()); Writer.addRecord(Record); } + if (Reader->hasStableFunctionMap()) { + StableFunctionMapRecord Record(Reader->releaseStableFunctionMap()); + Writer.addRecord(Record); + } if (OutputFormat == CGDataFormat::Text) { if (Error E = Writer.writeText(OS)) @@ -141,10 +143,12 @@ static int convert_main(int argc, const char *argv[]) { } static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, - OutlinedHashTreeRecord &GlobalOutlineRecord); + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord); static bool handleArchive(StringRef Filename, Archive &Arch, - OutlinedHashTreeRecord &GlobalOutlineRecord) { + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord) { bool Result = true; Error Err = Error::success(); for (const auto &Child : Arch.children(Err)) { @@ -155,7 +159,8 @@ static bool handleArchive(StringRef Filename, Archive &Arch, if (Error E = NameOrErr.takeError()) exitWithError(std::move(E), Filename); std::string Name = (Filename + "(" + NameOrErr.get() + ")").str(); - Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord); + Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord, + GlobalFunctionMapRecord); } if (Err) exitWithError(std::move(Err), Filename); @@ -163,7 +168,8 @@ static bool handleArchive(StringRef Filename, Archive &Arch, } static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, - OutlinedHashTreeRecord &GlobalOutlineRecord) { + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord) { Expected<std::unique_ptr<object::Binary>> BinOrErr = object::createBinary(Buffer); if (Error E = BinOrErr.takeError()) @@ -171,11 +177,12 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, bool Result = true; if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) { - if (Error E = - CodeGenDataReader::mergeFromObjectFile(Obj, GlobalOutlineRecord)) + if (Error E = CodeGenDataReader::mergeFromObjectFile( + Obj, GlobalOutlineRecord, GlobalFunctionMapRecord)) exitWithError(std::move(E), Filename); } else if (auto *Arch = dyn_cast<Archive>(BinOrErr->get())) { - Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord); + Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord, + GlobalFunctionMapRecord); } else { // TODO: Support for the MachO universal binary format. errs() << "Error: unsupported binary file: " << Filename << "\n"; @@ -186,19 +193,23 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, } static bool handleFile(StringRef Filename, - OutlinedHashTreeRecord &GlobalOutlineRecord) { + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord) { ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = BuffOrErr.getError()) exitWithErrorCode(EC, Filename); - return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord); + return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord, + GlobalFunctionMapRecord); } static int merge_main(int argc, const char *argv[]) { bool Result = true; OutlinedHashTreeRecord GlobalOutlineRecord; + StableFunctionMapRecord GlobalFunctionMapRecord; for (auto &Filename : InputFilenames) - Result &= handleFile(Filename, GlobalOutlineRecord); + Result &= + handleFile(Filename, GlobalOutlineRecord, GlobalFunctionMapRecord); if (!Result) exitWithError("failed to merge codegen data files."); @@ -206,6 +217,8 @@ static int merge_main(int argc, const char *argv[]) { CodeGenDataWriter Writer; if (!GlobalOutlineRecord.empty()) Writer.addRecord(GlobalOutlineRecord); + if (!GlobalFunctionMapRecord.empty()) + Writer.addRecord(GlobalFunctionMapRecord); std::error_code EC; raw_fd_ostream OS(OutputFilename, EC, @@ -249,6 +262,15 @@ static int show_main(int argc, const char *argv[]) { << "\n"; OS << " Depth: " << Tree->depth() << "\n"; } + if (Reader->hasStableFunctionMap()) { + auto Map = Reader->releaseStableFunctionMap(); + OS << "Stable function map:\n"; + OS << " Unique hash Count: " << Map->size() << "\n"; + OS << " Total function Count: " + << Map->size(StableFunctionMap::TotalFunctionCount) << "\n"; + OS << " Mergeable function Count: " + << Map->size(StableFunctionMap::MergeableFunctionCount) << "\n"; + } return 0; } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits