[llvm-branch-commits] [BOLT][NFC] Speedup BAT::writeMaps (PR #112061)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Oct 11 17:03:35 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
For a large binary with BAT section of size 38 MB with ~170k maps,
reduces writeMaps time from 70s down to 1s.
The inefficiency was in the use of std::distance with std::map::iterator
which doesn't provide random access. Use sorted vector for lookups.
Test Plan: NFC
---
Full diff: https://github.com/llvm/llvm-project/pull/112061.diff
2 Files Affected:
- (modified) bolt/include/bolt/Profile/BoltAddressTranslation.h (+6-5)
- (modified) bolt/lib/Profile/BoltAddressTranslation.cpp (+10-12)
``````````diff
diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h
index 2d920a114fea2e..0b3e41f61b3942 100644
--- a/bolt/include/bolt/Profile/BoltAddressTranslation.h
+++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h
@@ -143,15 +143,13 @@ class BoltAddressTranslation {
void constructMaps(const BinaryContext &BC);
/// Write the serialized address translation table for a function.
- template <bool Cold>
- void writeMaps(std::map<uint64_t, MapTy> &Maps, uint64_t &PrevAddress,
- raw_ostream &OS);
+ template <bool Cold> void writeMaps(uint64_t &PrevAddress, raw_ostream &OS);
/// Read the serialized address translation table for a function.
/// Return a parse error if failed.
template <bool Cold>
- void parseMaps(std::vector<uint64_t> &HotFuncs, uint64_t &PrevAddress,
- DataExtractor &DE, uint64_t &Offset, Error &Err);
+ void parseMaps(uint64_t &PrevAddress, DataExtractor &DE, uint64_t &Offset,
+ Error &Err);
/// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
/// entries in function address translation map.
@@ -163,6 +161,9 @@ class BoltAddressTranslation {
std::map<uint64_t, MapTy> Maps;
+ /// Ordered vector with addresses of hot functions.
+ std::vector<uint64_t> HotFuncs;
+
/// Map a function to its basic blocks count
std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index 334252cbd36026..c661782174cd75 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -159,8 +159,8 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
// Output addresses are delta-encoded
uint64_t PrevAddress = 0;
- writeMaps</*Cold=*/false>(Maps, PrevAddress, OS);
- writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
+ writeMaps</*Cold=*/false>(PrevAddress, OS);
+ writeMaps</*Cold=*/true>(PrevAddress, OS);
BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.getNumFunctions()
@@ -198,8 +198,7 @@ size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map,
}
template <bool Cold>
-void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
- uint64_t &PrevAddress, raw_ostream &OS) {
+void BoltAddressTranslation::writeMaps(uint64_t &PrevAddress, raw_ostream &OS) {
NamedRegionTimer T("writemaps", "write translation maps", "bat",
"process BAT", opts::TimeBAT);
const uint32_t NumFuncs =
@@ -231,9 +230,9 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
: 0;
uint32_t Skew = 0;
if (Cold) {
- auto HotEntryIt = Maps.find(ColdPartSource[Address]);
- assert(HotEntryIt != Maps.end());
- size_t HotIndex = std::distance(Maps.begin(), HotEntryIt);
+ auto HotEntryIt = llvm::lower_bound(HotFuncs, ColdPartSource[Address]);
+ assert(HotEntryIt != HotFuncs.end());
+ size_t HotIndex = std::distance(HotFuncs.begin(), HotEntryIt);
encodeULEB128(HotIndex - PrevIndex, OS);
PrevIndex = HotIndex;
// Skew of all input offsets for cold fragments is simply the first input
@@ -241,6 +240,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
Skew = Map.begin()->second >> 1;
encodeULEB128(Skew, OS);
} else {
+ HotFuncs.push_back(Address);
// Function hash
size_t BFHash = getBFHash(HotInputAddress);
LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash));
@@ -329,17 +329,15 @@ std::error_code BoltAddressTranslation::parse(raw_ostream &OS, StringRef Buf) {
return make_error_code(llvm::errc::io_error);
Error Err(Error::success());
- std::vector<uint64_t> HotFuncs;
uint64_t PrevAddress = 0;
- parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err);
- parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err);
+ parseMaps</*Cold=*/false>(PrevAddress, DE, Offset, Err);
+ parseMaps</*Cold=*/true>(PrevAddress, DE, Offset, Err);
OS << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
return errorToErrorCode(std::move(Err));
}
template <bool Cold>
-void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
- uint64_t &PrevAddress, DataExtractor &DE,
+void BoltAddressTranslation::parseMaps(uint64_t &PrevAddress, DataExtractor &DE,
uint64_t &Offset, Error &Err) {
const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")
``````````
</details>
https://github.com/llvm/llvm-project/pull/112061
More information about the llvm-branch-commits
mailing list