llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> For a large binary with BAT section of size 38 MB with ~170k maps, reduces writeMaps time from 70s down to 1s. The inefficiency was in the use of std::distance with std::map::iterator which doesn't provide random access. Use sorted vector for lookups. Test Plan: NFC --- Full diff: https://github.com/llvm/llvm-project/pull/112061.diff 2 Files Affected: - (modified) bolt/include/bolt/Profile/BoltAddressTranslation.h (+6-5) - (modified) bolt/lib/Profile/BoltAddressTranslation.cpp (+10-12) ``````````diff diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h index 2d920a114fea2e..0b3e41f61b3942 100644 --- a/bolt/include/bolt/Profile/BoltAddressTranslation.h +++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h @@ -143,15 +143,13 @@ class BoltAddressTranslation { void constructMaps(const BinaryContext &BC); /// Write the serialized address translation table for a function. - template <bool Cold> - void writeMaps(std::map<uint64_t, MapTy> &Maps, uint64_t &PrevAddress, - raw_ostream &OS); + template <bool Cold> void writeMaps(uint64_t &PrevAddress, raw_ostream &OS); /// Read the serialized address translation table for a function. /// Return a parse error if failed. template <bool Cold> - void parseMaps(std::vector<uint64_t> &HotFuncs, uint64_t &PrevAddress, - DataExtractor &DE, uint64_t &Offset, Error &Err); + void parseMaps(uint64_t &PrevAddress, DataExtractor &DE, uint64_t &Offset, + Error &Err); /// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY /// entries in function address translation map. @@ -163,6 +161,9 @@ class BoltAddressTranslation { std::map<uint64_t, MapTy> Maps; + /// Ordered vector with addresses of hot functions. + std::vector<uint64_t> HotFuncs; + /// Map a function to its basic blocks count std::unordered_map<uint64_t, size_t> NumBasicBlocksMap; diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index 334252cbd36026..c661782174cd75 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -159,8 +159,8 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) { // Output addresses are delta-encoded uint64_t PrevAddress = 0; - writeMaps</*Cold=*/false>(Maps, PrevAddress, OS); - writeMaps</*Cold=*/true>(Maps, PrevAddress, OS); + writeMaps</*Cold=*/false>(PrevAddress, OS); + writeMaps</*Cold=*/true>(PrevAddress, OS); BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n"; BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.getNumFunctions() @@ -198,8 +198,7 @@ size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map, } template <bool Cold> -void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps, - uint64_t &PrevAddress, raw_ostream &OS) { +void BoltAddressTranslation::writeMaps(uint64_t &PrevAddress, raw_ostream &OS) { NamedRegionTimer T("writemaps", "write translation maps", "bat", "process BAT", opts::TimeBAT); const uint32_t NumFuncs = @@ -231,9 +230,9 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps, : 0; uint32_t Skew = 0; if (Cold) { - auto HotEntryIt = Maps.find(ColdPartSource[Address]); - assert(HotEntryIt != Maps.end()); - size_t HotIndex = std::distance(Maps.begin(), HotEntryIt); + auto HotEntryIt = llvm::lower_bound(HotFuncs, ColdPartSource[Address]); + assert(HotEntryIt != HotFuncs.end()); + size_t HotIndex = std::distance(HotFuncs.begin(), HotEntryIt); encodeULEB128(HotIndex - PrevIndex, OS); PrevIndex = HotIndex; // Skew of all input offsets for cold fragments is simply the first input @@ -241,6 +240,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps, Skew = Map.begin()->second >> 1; encodeULEB128(Skew, OS); } else { + HotFuncs.push_back(Address); // Function hash size_t BFHash = getBFHash(HotInputAddress); LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash)); @@ -329,17 +329,15 @@ std::error_code BoltAddressTranslation::parse(raw_ostream &OS, StringRef Buf) { return make_error_code(llvm::errc::io_error); Error Err(Error::success()); - std::vector<uint64_t> HotFuncs; uint64_t PrevAddress = 0; - parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err); - parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err); + parseMaps</*Cold=*/false>(PrevAddress, DE, Offset, Err); + parseMaps</*Cold=*/true>(PrevAddress, DE, Offset, Err); OS << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n"; return errorToErrorCode(std::move(Err)); } template <bool Cold> -void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs, - uint64_t &PrevAddress, DataExtractor &DE, +void BoltAddressTranslation::parseMaps(uint64_t &PrevAddress, DataExtractor &DE, uint64_t &Offset, Error &Err) { const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err); LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "") `````````` </details> https://github.com/llvm/llvm-project/pull/112061 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits