[llvm-branch-commits] [BOLT][NFC] Speedup BAT::writeMaps (PR #112061)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Oct 11 17:03:35 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

For a large binary with BAT section of size 38 MB with ~170k maps,
reduces writeMaps time from 70s down to 1s.

The inefficiency was in the use of std::distance with std::map::iterator
which doesn't provide random access. Use sorted vector for lookups.

Test Plan: NFC


---
Full diff: https://github.com/llvm/llvm-project/pull/112061.diff


2 Files Affected:

- (modified) bolt/include/bolt/Profile/BoltAddressTranslation.h (+6-5) 
- (modified) bolt/lib/Profile/BoltAddressTranslation.cpp (+10-12) 


``````````diff
diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h
index 2d920a114fea2e..0b3e41f61b3942 100644
--- a/bolt/include/bolt/Profile/BoltAddressTranslation.h
+++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h
@@ -143,15 +143,13 @@ class BoltAddressTranslation {
   void constructMaps(const BinaryContext &BC);
 
   /// Write the serialized address translation table for a function.
-  template <bool Cold>
-  void writeMaps(std::map<uint64_t, MapTy> &Maps, uint64_t &PrevAddress,
-                 raw_ostream &OS);
+  template <bool Cold> void writeMaps(uint64_t &PrevAddress, raw_ostream &OS);
 
   /// Read the serialized address translation table for a function.
   /// Return a parse error if failed.
   template <bool Cold>
-  void parseMaps(std::vector<uint64_t> &HotFuncs, uint64_t &PrevAddress,
-                 DataExtractor &DE, uint64_t &Offset, Error &Err);
+  void parseMaps(uint64_t &PrevAddress, DataExtractor &DE, uint64_t &Offset,
+                 Error &Err);
 
   /// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
   /// entries in function address translation map.
@@ -163,6 +161,9 @@ class BoltAddressTranslation {
 
   std::map<uint64_t, MapTy> Maps;
 
+  /// Ordered vector with addresses of hot functions.
+  std::vector<uint64_t> HotFuncs;
+
   /// Map a function to its basic blocks count
   std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;
 
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index 334252cbd36026..c661782174cd75 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -159,8 +159,8 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
 
   // Output addresses are delta-encoded
   uint64_t PrevAddress = 0;
-  writeMaps</*Cold=*/false>(Maps, PrevAddress, OS);
-  writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
+  writeMaps</*Cold=*/false>(PrevAddress, OS);
+  writeMaps</*Cold=*/true>(PrevAddress, OS);
 
   BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
   BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.getNumFunctions()
@@ -198,8 +198,7 @@ size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map,
 }
 
 template <bool Cold>
-void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
-                                       uint64_t &PrevAddress, raw_ostream &OS) {
+void BoltAddressTranslation::writeMaps(uint64_t &PrevAddress, raw_ostream &OS) {
   NamedRegionTimer T("writemaps", "write translation maps", "bat",
                      "process BAT", opts::TimeBAT);
   const uint32_t NumFuncs =
@@ -231,9 +230,9 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
             : 0;
     uint32_t Skew = 0;
     if (Cold) {
-      auto HotEntryIt = Maps.find(ColdPartSource[Address]);
-      assert(HotEntryIt != Maps.end());
-      size_t HotIndex = std::distance(Maps.begin(), HotEntryIt);
+      auto HotEntryIt = llvm::lower_bound(HotFuncs, ColdPartSource[Address]);
+      assert(HotEntryIt != HotFuncs.end());
+      size_t HotIndex = std::distance(HotFuncs.begin(), HotEntryIt);
       encodeULEB128(HotIndex - PrevIndex, OS);
       PrevIndex = HotIndex;
       // Skew of all input offsets for cold fragments is simply the first input
@@ -241,6 +240,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
       Skew = Map.begin()->second >> 1;
       encodeULEB128(Skew, OS);
     } else {
+      HotFuncs.push_back(Address);
       // Function hash
       size_t BFHash = getBFHash(HotInputAddress);
       LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash));
@@ -329,17 +329,15 @@ std::error_code BoltAddressTranslation::parse(raw_ostream &OS, StringRef Buf) {
     return make_error_code(llvm::errc::io_error);
 
   Error Err(Error::success());
-  std::vector<uint64_t> HotFuncs;
   uint64_t PrevAddress = 0;
-  parseMaps</*Cold=*/false>(HotFuncs, PrevAddress, DE, Offset, Err);
-  parseMaps</*Cold=*/true>(HotFuncs, PrevAddress, DE, Offset, Err);
+  parseMaps</*Cold=*/false>(PrevAddress, DE, Offset, Err);
+  parseMaps</*Cold=*/true>(PrevAddress, DE, Offset, Err);
   OS << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
   return errorToErrorCode(std::move(Err));
 }
 
 template <bool Cold>
-void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
-                                       uint64_t &PrevAddress, DataExtractor &DE,
+void BoltAddressTranslation::parseMaps(uint64_t &PrevAddress, DataExtractor &DE,
                                        uint64_t &Offset, Error &Err) {
   const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
   LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << (Cold ? " cold" : "")

``````````

</details>


https://github.com/llvm/llvm-project/pull/112061


More information about the llvm-branch-commits mailing list