[llvm-branch-commits] [llvm] [BOLT] Match functions with call graph (PR #98125)

Shaw Young via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jul 9 13:52:14 PDT 2024


https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/98125

>From cf32a43e7c2b04079c6123fe13df4fb7226d771f Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 9 Jul 2024 10:04:25 -0700
Subject: [PATCH 1/2] Comments

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 69ea0899c5f2c..6753337c24ea7 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -501,7 +501,6 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
 
   // Maps binary functions to adjacent functions in the FCG.
   for (const BinaryFunction *CallerBF : BFs) {
-    // Add all call targets to the hash map.
     for (const BinaryBasicBlock &BB : CallerBF->blocks()) {
       for (const MCInst &Inst : BB) {
         if (!BC.MIB->isCall(Instr))
@@ -533,7 +532,8 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
     }
   }
 
-  // Create mapping from neighbor hash to BFs.
+  // Using the constructed adjacent function mapping, creates mapping from
+  // neighbor hash to BFs.
   std::unordered_map<uint64_t, std::vector<const BinaryFunction *>>
       NeighborHashToBFs;
   for (const BinaryFunction *BF : BFs) {
@@ -552,12 +552,12 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
         .push_back(BF);
   }
 
-  // TODO: change call anchor PR to have this representation - we need it here
+  // TODO: note, this will be introduced in the matching functions with calls
+  // as anchors pr
   DenseMap<uint32_t, const yaml::bolt::BinaryFunctionProfile * YamlBF>
       IdToYAMLBF;
-  // TODO: change call anchor PR to have this representation - we need it here
 
-  // Maps hashes to profiled functions.
+  // Maps YAML functions to adjacent functions in the profile FCG.
   std::unordered_map<const yaml::bolt::BinaryFunctionProfile * YamlBF,
                      FunctionHashes>
       YamlBFToHashes(BFs.size());
@@ -590,7 +590,7 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
     }
   }
 
-  // Matching YAMLBF with neighbor hashes.
+  // Matches YAMLBF to BFs with neighbor hashes.
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
     if (YamlBF.Used)
       continue;

>From ee9049fc4bd3d4203c19c9c0982a78ab3b47666f Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 9 Jul 2024 13:52:05 -0700
Subject: [PATCH 2/2] Moved blended hash definition

Created using spr 1.3.4
---
 bolt/include/bolt/Profile/YAMLProfileReader.h |  69 ++++++++++-
 bolt/lib/Profile/StaleProfileMatching.cpp     |  65 -----------
 bolt/lib/Profile/YAMLProfileReader.cpp        | 110 ++++++++----------
 3 files changed, 119 insertions(+), 125 deletions(-)

diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h
index 36e8f8739eee1..e8a34ecad9a08 100644
--- a/bolt/include/bolt/Profile/YAMLProfileReader.h
+++ b/bolt/include/bolt/Profile/YAMLProfileReader.h
@@ -16,6 +16,73 @@
 namespace llvm {
 namespace bolt {
 
+/// An object wrapping several components of a basic block hash. The combined
+/// (blended) hash is represented and stored as one uint64_t, while individual
+/// components are of smaller size (e.g., uint16_t or uint8_t).
+struct BlendedBlockHash {
+private:
+  using ValueOffset = Bitfield::Element<uint16_t, 0, 16>;
+  using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>;
+  using ValueInstr = Bitfield::Element<uint16_t, 32, 16>;
+  using ValuePred = Bitfield::Element<uint8_t, 48, 8>;
+  using ValueSucc = Bitfield::Element<uint8_t, 56, 8>;
+
+public:
+  explicit BlendedBlockHash() {}
+
+  explicit BlendedBlockHash(uint64_t Hash) {
+    Offset = Bitfield::get<ValueOffset>(Hash);
+    OpcodeHash = Bitfield::get<ValueOpcode>(Hash);
+    InstrHash = Bitfield::get<ValueInstr>(Hash);
+    PredHash = Bitfield::get<ValuePred>(Hash);
+    SuccHash = Bitfield::get<ValueSucc>(Hash);
+  }
+
+  /// Combine the blended hash into uint64_t.
+  uint64_t combine() const {
+    uint64_t Hash = 0;
+    Bitfield::set<ValueOffset>(Hash, Offset);
+    Bitfield::set<ValueOpcode>(Hash, OpcodeHash);
+    Bitfield::set<ValueInstr>(Hash, InstrHash);
+    Bitfield::set<ValuePred>(Hash, PredHash);
+    Bitfield::set<ValueSucc>(Hash, SuccHash);
+    return Hash;
+  }
+
+  /// Compute a distance between two given blended hashes. The smaller the
+  /// distance, the more similar two blocks are. For identical basic blocks,
+  /// the distance is zero.
+  uint64_t distance(const BlendedBlockHash &BBH) const {
+    assert(OpcodeHash == BBH.OpcodeHash &&
+           "incorrect blended hash distance computation");
+    uint64_t Dist = 0;
+    // Account for NeighborHash
+    Dist += SuccHash == BBH.SuccHash ? 0 : 1;
+    Dist += PredHash == BBH.PredHash ? 0 : 1;
+    Dist <<= 16;
+    // Account for InstrHash
+    Dist += InstrHash == BBH.InstrHash ? 0 : 1;
+    Dist <<= 16;
+    // Account for Offset
+    Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
+    return Dist;
+  }
+
+  /// The offset of the basic block from the function start.
+  uint16_t Offset{0};
+  /// (Loose) Hash of the basic block instructions, excluding operands.
+  uint16_t OpcodeHash{0};
+  /// (Strong) Hash of the basic block instructions, including opcodes and
+  /// operands.
+  uint16_t InstrHash{0};
+  /// (Loose) Hashes of the predecessors of the basic block.
+  uint8_t PredHash{0};
+  /// (Loose) Hashes of the successors of the basic block.
+  uint8_t SuccHash{0};
+};
+
+struct CallGraphMatcher {};
+
 class YAMLProfileReader : public ProfileReaderBase {
 public:
   explicit YAMLProfileReader(StringRef Filename)
@@ -92,7 +159,7 @@ class YAMLProfileReader : public ProfileReaderBase {
 
   /// Matches functions using exact hash.
   size_t matchWithHash(BinaryContext &BC);
-  
+
   /// Matches functions using the call graph.
   size_t matchWithCallGraph(BinaryContext &BC);
 
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index e473beb2fad8c..9631ac176554e 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -121,71 +121,6 @@ cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
 namespace llvm {
 namespace bolt {
 
-/// An object wrapping several components of a basic block hash. The combined
-/// (blended) hash is represented and stored as one uint64_t, while individual
-/// components are of smaller size (e.g., uint16_t or uint8_t).
-struct BlendedBlockHash {
-private:
-  using ValueOffset = Bitfield::Element<uint16_t, 0, 16>;
-  using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>;
-  using ValueInstr = Bitfield::Element<uint16_t, 32, 16>;
-  using ValuePred = Bitfield::Element<uint8_t, 48, 8>;
-  using ValueSucc = Bitfield::Element<uint8_t, 56, 8>;
-
-public:
-  explicit BlendedBlockHash() {}
-
-  explicit BlendedBlockHash(uint64_t Hash) {
-    Offset = Bitfield::get<ValueOffset>(Hash);
-    OpcodeHash = Bitfield::get<ValueOpcode>(Hash);
-    InstrHash = Bitfield::get<ValueInstr>(Hash);
-    PredHash = Bitfield::get<ValuePred>(Hash);
-    SuccHash = Bitfield::get<ValueSucc>(Hash);
-  }
-
-  /// Combine the blended hash into uint64_t.
-  uint64_t combine() const {
-    uint64_t Hash = 0;
-    Bitfield::set<ValueOffset>(Hash, Offset);
-    Bitfield::set<ValueOpcode>(Hash, OpcodeHash);
-    Bitfield::set<ValueInstr>(Hash, InstrHash);
-    Bitfield::set<ValuePred>(Hash, PredHash);
-    Bitfield::set<ValueSucc>(Hash, SuccHash);
-    return Hash;
-  }
-
-  /// Compute a distance between two given blended hashes. The smaller the
-  /// distance, the more similar two blocks are. For identical basic blocks,
-  /// the distance is zero.
-  uint64_t distance(const BlendedBlockHash &BBH) const {
-    assert(OpcodeHash == BBH.OpcodeHash &&
-           "incorrect blended hash distance computation");
-    uint64_t Dist = 0;
-    // Account for NeighborHash
-    Dist += SuccHash == BBH.SuccHash ? 0 : 1;
-    Dist += PredHash == BBH.PredHash ? 0 : 1;
-    Dist <<= 16;
-    // Account for InstrHash
-    Dist += InstrHash == BBH.InstrHash ? 0 : 1;
-    Dist <<= 16;
-    // Account for Offset
-    Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
-    return Dist;
-  }
-
-  /// The offset of the basic block from the function start.
-  uint16_t Offset{0};
-  /// (Loose) Hash of the basic block instructions, excluding operands.
-  uint16_t OpcodeHash{0};
-  /// (Strong) Hash of the basic block instructions, including opcodes and
-  /// operands.
-  uint16_t InstrHash{0};
-  /// (Loose) Hashes of the predecessors of the basic block.
-  uint8_t PredHash{0};
-  /// (Loose) Hashes of the successors of the basic block.
-  uint8_t SuccHash{0};
-};
-
 /// The object is used to identify and match basic blocks in a BinaryFunction
 /// given their hashes computed on a binary built from several revisions behind
 /// release.
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 6753337c24ea7..2cab5c385c3c9 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -458,58 +458,53 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
   struct FunctionHashes {
     uint64_t Hash{0};
     uint64_t AdjacentFunctionHash{0};
-    std::set<uint64_t> AdjacentFunctionHashesSet;
+    std::vector<uint64_t> AdjacentFunctionHashesSet;
   };
   size_t MatchedWithCallGraph = 0;
   std::vector<BinaryFunction *> BFs = BC.getAllBinaryFunctions();
-  std::unordered_map<const BinaryFunction *, FunctionHashes> BFToHashes(
-      BFs.size());
+  std::unordered_map<BinaryFunction *, FunctionHashes> BFToHashes(BFs.size());
 
   // Computes the loose hash, as in the opcode hash, of a binary function.
   auto ComputeBFLooseHash = [&](const BinaryFunction *BF) {
-    std::string FunctionHashStr = std::accumulate(
-        BF->begin(), BF->end(), std::string(""),
-        [&](std::string Accum, const BinaryBasicBlock &BB) {
-          std::string BlockHashStr = hashBlockLoose(BC, BB);
-          uint16_t OpcodeHash;
-          if (YamlBP.Header.HashFunction == HashFunction::StdHash)
-            OpcodeHash =
-                (uint16_t)hash_value(std::hash<std::string>{}(BlockHashStr));
-          else if (YamlBP.Header.HashFunction == HashFunction::XXH3)
-            OpcodeHash = (uint16_t)llvm::xxh3_64bits(BlockHashStr);
-          else
-            llvm_unreachable("Unsupported hash function");
-          return std::move(Accum) + std::to_string(OpcodeHash);
-        });
-
+    std::string FunctionHashStr;
+    for (const BinaryBasicBlock &BB : *BF) {
+      std::string BlockHashStr = hashBlockLoose(BC, BB);
+      uint16_t OpcodeHash;
+      if (YamlBP.Header.HashFunction == HashFunction::StdHash)
+        OpcodeHash =
+            (uint16_t)hash_value(std::hash<std::string>{}(BlockHashStr));
+      else if (YamlBP.Header.HashFunction == HashFunction::XXH3)
+        OpcodeHash = (uint16_t)llvm::xxh3_64bits(BlockHashStr);
+      else
+        llvm_unreachable("Unsupported hash function");
+      FunctionHashStr.append(std::to_string(OpcodeHash));
+    }
     return std::hash<std::string>{}(FunctionHashStr);
   };
 
   // Computes the loose hash of a function profile.
   auto ComputeYamlBFLooseHash =
       [&](const yaml::bolt::BinaryFunctionProfile &YamlBF) {
-        std::string HashStr = std::to_string(
-            YamlBF.Blocks.begin(), YamlBF.Blocks.end(), std::string(""),
-            [&](std::string Accum,
-                const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
-              BlendedBlockHash YamlHash(YamlBB.Hash);
-              return std::move(Accum) + std::to_string(YamlHash.OpcodeHash);
-            });
-
+        std::string HashStr;
+        for (auto &YamlBB : YamlBF.Blocks) {
+          BlendedBlockHash YamlHash(YamlBB.Hash);
+          HashStr.append(std::to_string(YamlHash.OpcodeHash));
+        }
         return std::hash<std::string>{}(HashStr);
       };
 
   // Maps binary functions to adjacent functions in the FCG.
-  for (const BinaryFunction *CallerBF : BFs) {
+  for (BinaryFunction *CallerBF : BFs) {
     for (const BinaryBasicBlock &BB : CallerBF->blocks()) {
-      for (const MCInst &Inst : BB) {
+      for (const MCInst &Instr : BB) {
         if (!BC.MIB->isCall(Instr))
           continue;
         const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
         if (!CallSymbol)
           continue;
-        BinaryData *BD =
-            BC.getBinaryDataByName(CallSymbol->getName()) if (!BD) continue;
+        BinaryData *BD = BC.getBinaryDataByName(CallSymbol->getName());
+        if (!BD)
+          continue;
         BinaryFunction *CalleeBF = BC.getFunctionForSymbol(BD->getSymbol());
         if (!CalleeBF)
           continue;
@@ -524,9 +519,9 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
           CallerBFIt->second = FunctionHashes();
           CallerBFIt->second.Hash = ComputeBFLooseHash(CallerBF);
         }
-        CalleeBFIt->second.AdjacentFunctionHashesSet.insert(
+        CalleeBFIt->second.AdjacentFunctionHashesSet.push_back(
             CallerBFIt->second.Hash);
-        CallerBFIt->second.AdjacentFunctionHashesSet.insert(
+        CallerBFIt->second.AdjacentFunctionHashesSet.push_back(
             CalleeBFIt->second.Hash);
       }
     }
@@ -534,13 +529,13 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
 
   // Using the constructed adjacent function mapping, creates mapping from
   // neighbor hash to BFs.
-  std::unordered_map<uint64_t, std::vector<const BinaryFunction *>>
-      NeighborHashToBFs;
-  for (const BinaryFunction *BF : BFs) {
+  std::unordered_map<uint64_t, std::vector<BinaryFunction *>> NeighborHashToBFs;
+  for (BinaryFunction *BF : BFs) {
     auto It = BFToHashes.find(BF);
     assert(It != BFToHashes.end() && "All BFs should be processed");
     FunctionHashes &FunctionHashes = It->second;
-
+    std::sort(FunctionHashes.AdjacentFunctionHashesSet.begin(),
+              FunctionHashes.AdjacentFunctionHashesSet.end());
     std::string AdjacentFunctionHashStr =
         std::accumulate(FunctionHashes.AdjacentFunctionHashesSet.begin(),
                         FunctionHashes.AdjacentFunctionHashesSet.end(),
@@ -554,38 +549,34 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
 
   // TODO: note, this will be introduced in the matching functions with calls
   // as anchors pr
-  DenseMap<uint32_t, const yaml::bolt::BinaryFunctionProfile * YamlBF>
-      IdToYAMLBF;
+  DenseMap<uint32_t, const yaml::bolt::BinaryFunctionProfile *> IdToYAMLBF;
 
   // Maps YAML functions to adjacent functions in the profile FCG.
-  std::unordered_map<const yaml::bolt::BinaryFunctionProfile * YamlBF,
-                     FunctionHashes>
+  std::unordered_map<const yaml::bolt::BinaryFunctionProfile *, FunctionHashes>
       YamlBFToHashes(BFs.size());
   for (const auto &CallerYamlBF : YamlBP.Functions) {
-    if (YamlBF.Used)
-      continue;
-    for (const auto &YamlBB : CallerYamlBF.BasicBlocks) {
-      for (const auto &CallSite : YamlBB.Callsites) {
+    for (const auto &YamlBB : CallerYamlBF.Blocks) {
+      for (const auto &CallSite : YamlBB.CallSites) {
         auto IdToYAMLBFIt = IdToYAMLBF.find(CallSite.DestId);
         if (IdToYAMLBFIt == IdToYAMLBF.end())
           continue;
 
-        auto CalleeBFIt = YamlBFToHashes.find(IdToYAMLBFIt->second);
-        if (CalleeBFIt == YamlBFToHashes.end()) {
-          CalleeBFIt->second = FunctionHashes();
-          CalleeBFIt->second.Hash =
+        auto CalleeYamlBFIt = YamlBFToHashes.find(IdToYAMLBFIt->second);
+        if (CalleeYamlBFIt == YamlBFToHashes.end()) {
+          CalleeYamlBFIt->second = FunctionHashes();
+          CalleeYamlBFIt->second.Hash =
               ComputeYamlBFLooseHash(*IdToYAMLBFIt->second);
         }
-        auto CallerBFIt = YamlBFToHashes.find(&CallerYamlBF);
-        if (CallerBFIt == YamlBFToHashes.end()) {
-          CallerBFIt->second = FunctionHashes();
-          CallerBFIt->second.Hash = ComputeYamlBFLooseHash(CallerBF);
+        auto CallerYamlBFIt = YamlBFToHashes.find(&CallerYamlBF);
+        if (CallerYamlBFIt == YamlBFToHashes.end()) {
+          CallerYamlBFIt->second = FunctionHashes();
+          CallerYamlBFIt->second.Hash = ComputeYamlBFLooseHash(CallerYamlBF);
         }
 
-        CalleeBFIt->second.AdjacentFunctionHashesSet.insert(
-            CallerBFIt->second.Hash);
-        CallerBFIt->second.AdjacentFunctionHashesSet.insert(
-            CalleeBFIt->second.Hash);
+        CalleeYamlBFIt->second.AdjacentFunctionHashesSet.push_back(
+            CallerYamlBFIt->second.Hash);
+        CallerYamlBFIt->second.AdjacentFunctionHashesSet.push_back(
+            CalleeYamlBFIt->second.Hash);
       }
     }
   }
@@ -599,7 +590,8 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
     assert(It != YamlBFToHashes.end() &&
            "All unused functions should be processed");
     FunctionHashes &FunctionHashes = It->second;
-
+    std::sort(FunctionHashes.AdjacentFunctionHashesSet.begin(),
+              FunctionHashes.AdjacentFunctionHashesSet.end());
     std::string AdjacentFunctionHashStr =
         std::accumulate(FunctionHashes.AdjacentFunctionHashesSet.begin(),
                         FunctionHashes.AdjacentFunctionHashesSet.end(),
@@ -614,7 +606,7 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
     if (NeighborHashToBFsIt == NeighborHashToBFs.end())
       continue;
 
-    for (const BinaryFunction *BF : NeighborHashToBFsIt->second) {
+    for (BinaryFunction *BF : NeighborHashToBFsIt->second) {
       if (!ProfiledFunctions.count(BF) && profileMatches(YamlBF, *BF)) {
         matchProfileToFunction(YamlBF, *BF);
         ++MatchedWithCallGraph;
@@ -668,7 +660,7 @@ size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
 
   // Maps namespaces to BFs excluding binary functions with no equal sized
   // profiled functions belonging to the same namespace.
-  / for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
     std::string DemangledName = BF->getDemangledName();
     std::string Namespace = DeriveNameSpace(DemangledName);
 



More information about the llvm-branch-commits mailing list