[llvm] [BOLT][NFC] Refactor BAT metadata data structures (PR #86353)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 22 15:45:32 PDT 2024


https://github.com/aaupov created https://github.com/llvm/llvm-project/pull/86353

Hide the implementations of `FuncHashes` and `BBHashMap` classes,
getting rid of `at` accessors that could throw an exception.

Test Plan: NFC


>From 12a5c6c5bb68bf749b4dcb3cf90669ca6b056a8c Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Fri, 22 Mar 2024 15:45:21 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 .../bolt/Profile/BoltAddressTranslation.h     | 125 +++++++++++++++---
 bolt/lib/Profile/BoltAddressTranslation.cpp   |  52 +++-----
 2 files changed, 128 insertions(+), 49 deletions(-)

diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h
index d583ce0b76a246..0ae556bb0e54cf 100644
--- a/bolt/include/bolt/Profile/BoltAddressTranslation.h
+++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h
@@ -115,17 +115,6 @@ class BoltAddressTranslation {
   /// Save function and basic block hashes used for metadata dump.
   void saveMetadata(BinaryContext &BC);
 
-  /// Returns BB hash by function output address (after BOLT) and basic block
-  /// input offset.
-  size_t getBBHash(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;
-
-  /// Returns BF hash by function output address (after BOLT).
-  size_t getBFHash(uint64_t OutputAddress) const;
-
-  /// Returns BB index by function output address (after BOLT) and basic block
-  /// input offset.
-  unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;
-
   /// True if a given \p Address is a function with translation table entry.
   bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }
 
@@ -158,10 +147,6 @@ class BoltAddressTranslation {
 
   std::map<uint64_t, MapTy> Maps;
 
-  /// Map basic block input offset to a basic block index and hash pair.
-  using BBHashMap = std::unordered_map<uint32_t, std::pair<unsigned, size_t>>;
-  std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;
-
   /// Map a function to its basic blocks count
   std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;
 
@@ -174,6 +159,116 @@ class BoltAddressTranslation {
   /// Identifies the address of a control-flow changing instructions in a
   /// translation map entry
   const static uint32_t BRANCHENTRY = 0x1;
+
+  class BBHashMapEntryTy {
+    unsigned Index;
+    size_t Hash;
+
+  public:
+    unsigned getBBIndex() const { return Index; }
+    size_t getBBHash() const { return Hash; }
+    BBHashMapEntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
+  };
+
+public:
+  /// Map basic block input offset to a basic block index and hash pair.
+  class BBHashMapTy : std::unordered_map<uint32_t, BBHashMapEntryTy> {
+    const BBHashMapEntryTy &getEntry(uint32_t BBInputOffset) const {
+      auto It = find(BBInputOffset);
+      assert(It != end());
+      return It->second;
+    }
+
+  public:
+    unsigned getBBIndex(uint32_t BBInputOffset) const {
+      return getEntry(BBInputOffset).getBBIndex();
+    }
+
+    size_t getBBHash(uint32_t BBInputOffset) const {
+      return getEntry(BBInputOffset).getBBHash();
+    }
+
+    void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
+      emplace(BBInputOffset, BBHashMapEntryTy(BBIndex, BBHash));
+    }
+
+    size_t getNumBasicBlocks() const { return size(); }
+  };
+
+private:
+  class FuncHashEntryTy {
+    size_t Hash;
+    BBHashMapTy BBHashMap;
+
+  public:
+    size_t getBFHash() const { return Hash; }
+    const BBHashMapTy &getBBHashMap() const { return BBHashMap; }
+    FuncHashEntryTy(size_t Hash) : Hash(Hash) {}
+  };
+
+public:
+  /// Map function output address to its hash and basic blocks hash map.
+  class FuncHashesTy {
+    std::unordered_map<uint64_t, FuncHashEntryTy> Map;
+    const FuncHashEntryTy &getEntry(uint64_t FuncOutputAddress) const {
+      auto It = Map.find(FuncOutputAddress);
+      assert(It != Map.end());
+      return It->second;
+    }
+
+  public:
+    size_t getBFHash(uint64_t FuncOutputAddress) const {
+      return getEntry(FuncOutputAddress).getBFHash();
+    }
+
+    const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
+      return getEntry(FuncOutputAddress).getBBHashMap();
+    }
+
+    void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
+      Map.emplace(FuncOutputAddress, FuncHashEntryTy(BFHash));
+    }
+
+    size_t getNumFunctions() const { return Map.size(); };
+
+    size_t getNumBasicBlocks() const {
+      size_t NumBasicBlocks{0};
+      for (auto &I : Map)
+        NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks();
+      return NumBasicBlocks;
+    }
+  };
+
+  /// Returns BF hash by function output address (after BOLT).
+  size_t getBFHash(uint64_t FuncOutputAddress) const {
+    return FuncHashes.getBFHash(FuncOutputAddress);
+  }
+
+  /// Returns BBHashMap by function output address (after BOLT).
+  const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
+    return FuncHashes.getBBHashMap(FuncOutputAddress);
+  }
+
+  BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) {
+    return const_cast<BBHashMapTy &>(
+        std::as_const(*this).getBBHashMap(FuncOutputAddress));
+  }
+
+  /// Returns BB index by function output address (after BOLT) and basic block
+  /// input offset.
+  unsigned getBBIndex(uint64_t FuncOutputAddress,
+                      uint32_t BBInputOffset) const {
+    return getBBHashMap(FuncOutputAddress).getBBIndex(BBInputOffset);
+  }
+
+  /// Returns BB hash by function output address (after BOLT) and basic block
+  /// input offset.
+  size_t getBBHash(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const {
+    return getBBHashMap(FuncOutputAddress).getBBHash(BBInputOffset);
+  }
+
+private:
+  FuncHashesTy FuncHashes;
 };
 } // namespace bolt
 
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index 31886f4c802538..bccb1e9d4a416c 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -125,11 +125,9 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
   writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);
 
   BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
-  const uint64_t NumBBHashes = std::accumulate(
-      FuncHashes.begin(), FuncHashes.end(), 0ull,
-      [](size_t Acc, const auto &B) { return Acc + B.second.second.size(); });
-  BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.size() << " function and "
-            << NumBBHashes << " basic block hashes\n";
+  BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.getNumFunctions()
+            << " function and " << FuncHashes.getNumBasicBlocks()
+            << " basic block hashes\n";
 }
 
 APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
@@ -176,11 +174,10 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
     // Only process cold fragments in cold mode, and vice versa.
     if (Cold != ColdPartSource.count(Address))
       continue;
-    // NB: here we use the input address because hashes are saved early (in
-    // `saveMetadata`) before output addresses are assigned.
+    // NB: in `writeMaps` we use the input address because hashes are saved
+    // early in `saveMetadata` before output addresses are assigned.
     const uint64_t HotInputAddress =
         ReverseMap[Cold ? ColdPartSource[Address] : Address];
-    std::pair<size_t, BBHashMap> &FuncHashPair = FuncHashes[HotInputAddress];
     MapTy &Map = MapEntry.second;
     const uint32_t NumEntries = Map.size();
     LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
@@ -194,10 +191,11 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
       PrevIndex = HotIndex;
     } else {
       // Function hash
-      LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", FuncHashPair.first));
-      OS.write(reinterpret_cast<char *>(&FuncHashPair.first), 8);
+      size_t BFHash = getBFHash(HotInputAddress);
+      LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash));
+      OS.write(reinterpret_cast<char *>(&BFHash), 8);
       // Number of basic blocks
-      size_t NumBasicBlocks = FuncHashPair.second.size();
+      size_t NumBasicBlocks = getBBHashMap(HotInputAddress).getNumBasicBlocks();
       LLVM_DEBUG(dbgs() << "Basic blocks: " << NumBasicBlocks << '\n');
       encodeULEB128(NumBasicBlocks, OS);
     }
@@ -233,9 +231,9 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
         encodeSLEB128(KeyVal.second - InOffset, OS);
       InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
       if ((InOffset & BRANCHENTRY) == 0) {
-        unsigned BBIndex;
-        size_t BBHash;
-        std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
+        const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
+        unsigned BBIndex = BBHashMap.getBBIndex(InOffset >> 1);
+        size_t BBHash = BBHashMap.getBBHash(InOffset >> 1);
         OS.write(reinterpret_cast<char *>(&BBHash), 8);
         // Basic block index in the input binary
         encodeULEB128(BBIndex - PrevBBIndex, OS);
@@ -295,7 +293,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
       HotFuncs.push_back(Address);
       // Function hash
       const size_t FuncHash = DE.getU64(&Offset, &Err);
-      FuncHashes[Address].first = FuncHash;
+      FuncHashes.addEntry(Address, FuncHash);
       LLVM_DEBUG(dbgs() << formatv("{0:x}: hash {1:x}\n", Address, FuncHash));
       // Number of basic blocks
       const size_t NumBasicBlocks = DE.getULEB128(&Offset, &Err);
@@ -355,8 +353,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
         BBIndexDelta = DE.getULEB128(&Offset, &Err);
         BBIndex += BBIndexDelta;
         // Map basic block hash to hot fragment by input offset
-        FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
-                                              std::pair(BBIndex, BBHash));
+        getBBHashMap(HotAddress).addEntry(InputOffset >> 1, BBIndex, BBHash);
       }
       LLVM_DEBUG({
         dbgs() << formatv(
@@ -515,27 +512,14 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
     if (BF.isIgnored() || (!BC.HasRelocations && !BF.isSimple()))
       continue;
     // Prepare function and block hashes
-    FuncHashes[BF.getAddress()].first = BF.computeHash();
+    FuncHashes.addEntry(BF.getAddress(), BF.computeHash());
     BF.computeBlockHashes();
+    BBHashMapTy &BBHashMap = getBBHashMap(BF.getAddress());
+    // Set BF/BB metadata
     for (const BinaryBasicBlock &BB : BF)
-      FuncHashes[BF.getAddress()].second.emplace(
-          BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
+      BBHashMap.addEntry(BB.getInputOffset(), BB.getIndex(), BB.getHash());
   }
 }
 
-unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
-                                            uint32_t BBInputOffset) const {
-  return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first;
-}
-
-size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
-                                         uint32_t BBInputOffset) const {
-  return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second;
-}
-
-size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {
-  return FuncHashes.at(OutputAddress).first;
-}
-
 } // namespace bolt
 } // namespace llvm



More information about the llvm-commits mailing list