[llvm-branch-commits] [llvm] [BOLT] Match blocks with calls as anchors (PR #96596)

Shaw Young via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jul 2 11:52:27 PDT 2024


https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/96596

>From 05d59574d6260b98a469921eb2fccf5398bfafb6 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 24 Jun 2024 23:00:59 -0700
Subject: [PATCH 1/9] Added call to matchWithCallsAsAnchors

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index aafffac3d4b1c..1a0e5d239d252 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -479,6 +479,9 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
       matchProfileToFunction(YamlBF, *BF);
 
+  uint64_t MatchedWithCallsAsAnchors = 0;
+  matchWithCallsAsAnchors(BC,  MatchedWithCallsAsAnchors);
+
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
     if (!YamlBF.Used && opts::Verbosity >= 1)
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 77ef0008f4f5987719555e6cc3e32da812ae0f31 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 24 Jun 2024 23:11:43 -0700
Subject: [PATCH 2/9] Changed CallHashToBF representation

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 1a0e5d239d252..91b01a99c7485 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -29,6 +29,10 @@ static llvm::cl::opt<bool>
                cl::desc("ignore hash while reading function profile"),
                cl::Hidden, cl::cat(BoltOptCategory));
 
+llvm::cl::opt<bool> MatchWithCallsAsAnchors("match-with-calls-as-anchors",
+                                  cl::desc("Matches with calls as anchors"),
+                                  cl::Hidden, cl::cat(BoltOptCategory));
+
 llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
                                   cl::desc("use DFS order for YAML profile"),
                                   cl::Hidden, cl::cat(BoltOptCategory));
@@ -353,7 +357,7 @@ void YAMLProfileReader::matchWithCallsAsAnchors(
     llvm_unreachable("Unhandled HashFunction");
   };
 
-  std::unordered_map<uint64_t, BinaryFunction &> CallHashToBF;
+  std::unordered_map<uint64_t, BinaryFunction *> CallHashToBF;
 
   for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
     if (ProfiledFunctions.count(BF))
@@ -375,12 +379,12 @@ void YAMLProfileReader::matchWithCallsAsAnchors(
       for (const std::string &FunctionName : FunctionNames)
         HashString.append(FunctionName);
     }
-    CallHashToBF.emplace(ComputeCallHash(HashString), BF);
+    CallHashToBF[ComputeCallHash(HashString)] = BF;
   }
 
   std::unordered_map<uint32_t, std::string> ProfiledFunctionIdToName;
 
-  for (const yaml::bolt::BinaryFunctionProfile YamlBF : YamlBP.Functions)
+  for (const yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
     ProfiledFunctionIdToName[YamlBF.Id] = YamlBF.Name;
 
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
@@ -401,7 +405,7 @@ void YAMLProfileReader::matchWithCallsAsAnchors(
     auto It = CallHashToBF.find(Hash);
     if (It == CallHashToBF.end())
       continue;
-    matchProfileToFunction(YamlBF, It->second);
+    matchProfileToFunction(YamlBF, *It->second);
     ++MatchedWithCallsAsAnchors;
   }
 }
@@ -480,7 +484,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       matchProfileToFunction(YamlBF, *BF);
 
   uint64_t MatchedWithCallsAsAnchors = 0;
-  matchWithCallsAsAnchors(BC,  MatchedWithCallsAsAnchors);
+  if (opts::MatchWithCallsAsAnchors)
+    matchWithCallsAsAnchors(BC,  MatchedWithCallsAsAnchors);
 
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
     if (!YamlBF.Used && opts::Verbosity >= 1)

>From ea7cb68ab9e8e158412c2e752986968968a60d93 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 25 Jun 2024 09:28:39 -0700
Subject: [PATCH 3/9] Changed BF called FunctionNames to multiset

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 91b01a99c7485..3b3d73f7af023 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -365,7 +365,7 @@ void YAMLProfileReader::matchWithCallsAsAnchors(
 
     std::string HashString;
     for (const auto &BB : BF->blocks()) {
-      std::set<std::string> FunctionNames;
+      std::multiset<std::string> FunctionNames;
       for (const MCInst &Instr : BB) {
         // Skip non-call instructions.
         if (!BC.MIB->isCall(Instr))
@@ -397,9 +397,8 @@ void YAMLProfileReader::matchWithCallsAsAnchors(
         std::string &FunctionName = ProfiledFunctionIdToName[CallSite.DestId];
         FunctionNames.insert(FunctionName);
       }
-      for (const std::string &FunctionName : FunctionNames) {
+      for (const std::string &FunctionName : FunctionNames)
         HashString.append(FunctionName);
-      }
     }
     size_t Hash = ComputeCallHash(HashString);
     auto It = CallHashToBF.find(Hash);

>From c435034970c831c75ab718a9cb82c77d3004e091 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 25 Jun 2024 11:19:47 -0700
Subject: [PATCH 4/9] GetOneName for YamlBF

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 38 +++++++++++++++++++-------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 3b3d73f7af023..2ea1a051d314c 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -29,9 +29,10 @@ static llvm::cl::opt<bool>
                cl::desc("ignore hash while reading function profile"),
                cl::Hidden, cl::cat(BoltOptCategory));
 
-llvm::cl::opt<bool> MatchWithCallsAsAnchors("match-with-calls-as-anchors",
-                                  cl::desc("Matches with calls as anchors"),
-                                  cl::Hidden, cl::cat(BoltOptCategory));
+llvm::cl::opt<bool>
+    MatchWithCallsAsAnchors("match-with-calls-as-anchors",
+                            cl::desc("Matches with calls as anchors"),
+                            cl::Hidden, cl::cat(BoltOptCategory));
 
 llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
                                   cl::desc("use DFS order for YAML profile"),
@@ -358,12 +359,11 @@ void YAMLProfileReader::matchWithCallsAsAnchors(
   };
 
   std::unordered_map<uint64_t, BinaryFunction *> CallHashToBF;
-
   for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
     if (ProfiledFunctions.count(BF))
       continue;
 
-    std::string HashString;
+    std::string HashString{""};
     for (const auto &BB : BF->blocks()) {
       std::multiset<std::string> FunctionNames;
       for (const MCInst &Instr : BB) {
@@ -379,14 +379,23 @@ void YAMLProfileReader::matchWithCallsAsAnchors(
       for (const std::string &FunctionName : FunctionNames)
         HashString.append(FunctionName);
     }
+    // its possible we have some collisions.. our options to solve include
+    // cmp block ct, or potentially fname edit distance. although, thats p exp
+    if (HashString == "")
+      continue;
     CallHashToBF[ComputeCallHash(HashString)] = BF;
   }
 
   std::unordered_map<uint32_t, std::string> ProfiledFunctionIdToName;
-
-  for (const yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
-    ProfiledFunctionIdToName[YamlBF.Id] = YamlBF.Name;
-
+  for (const yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
+    // How do we handle functions with multiple names? LTO specifically,
+    // right now the scope of this is to identical names wo lto i think
+    StringRef Name = YamlBF.Name;
+    const size_t Pos = Name.find("(*");
+    if (Pos != StringRef::npos)
+      Name = Name.substr(0, Pos);
+    ProfiledFunctionIdToName[YamlBF.Id] = Name;
+  }
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
     if (YamlBF.Used)
       continue;
@@ -400,10 +409,14 @@ void YAMLProfileReader::matchWithCallsAsAnchors(
       for (const std::string &FunctionName : FunctionNames)
         HashString.append(FunctionName);
     }
+    if (HashString == "")
+      continue;
     size_t Hash = ComputeCallHash(HashString);
     auto It = CallHashToBF.find(Hash);
     if (It == CallHashToBF.end())
       continue;
+    if (ProfiledFunctions.count(It->second))
+      continue;
     matchProfileToFunction(YamlBF, *It->second);
     ++MatchedWithCallsAsAnchors;
   }
@@ -484,13 +497,18 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   uint64_t MatchedWithCallsAsAnchors = 0;
   if (opts::MatchWithCallsAsAnchors)
-    matchWithCallsAsAnchors(BC,  MatchedWithCallsAsAnchors);
+    matchWithCallsAsAnchors(BC, MatchedWithCallsAsAnchors);
 
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
     if (!YamlBF.Used && opts::Verbosity >= 1)
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
              << '\n';
 
+  if (opts::Verbosity >= 2) {
+    outs() << "BOLT-INFO: matched " << MatchedWithCallsAsAnchors
+           << " functions with calls as anchors\n";
+  }
+
   // Set for parseFunctionProfile().
   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
   NormalizeByCalls = usesEvent("branches");

>From 42363169b83a0ca7101187670b20d0e2167f2fbe Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 27 Jun 2024 16:20:47 -0700
Subject: [PATCH 5/9] Moved to StaleProfileMatching

Created using spr 1.3.4
---
 bolt/include/bolt/Core/HashUtilities.h        |  2 +
 bolt/include/bolt/Profile/YAMLProfileReader.h |  4 -
 bolt/lib/Core/HashUtilities.cpp               | 25 ++++++
 bolt/lib/Profile/StaleProfileMatching.cpp     | 51 ++++++++---
 bolt/lib/Profile/YAMLProfileReader.cpp        | 89 -------------------
 5 files changed, 65 insertions(+), 106 deletions(-)

diff --git a/bolt/include/bolt/Core/HashUtilities.h b/bolt/include/bolt/Core/HashUtilities.h
index 53ea110aa683b..c86cd906b3f34 100644
--- a/bolt/include/bolt/Core/HashUtilities.h
+++ b/bolt/include/bolt/Core/HashUtilities.h
@@ -35,6 +35,8 @@ std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
 
 std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB);
 
+std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB);
+
 } // namespace bolt
 } // namespace llvm
 
diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h
index 572e8a66c5b50..7a8aa176c30f1 100644
--- a/bolt/include/bolt/Profile/YAMLProfileReader.h
+++ b/bolt/include/bolt/Profile/YAMLProfileReader.h
@@ -73,10 +73,6 @@ class YAMLProfileReader : public ProfileReaderBase {
   bool parseFunctionProfile(BinaryFunction &Function,
                             const yaml::bolt::BinaryFunctionProfile &YamlBF);
 
-  /// Match profiles to functions using calls as anchors.
-  void matchWithCallsAsAnchors(BinaryContext &BC,
-                               uint64_t &MatchedWithCallsAsAnchors);
-
   /// Infer function profile from stale data (collected on older binaries).
   bool inferStaleProfile(BinaryFunction &Function,
                          const yaml::bolt::BinaryFunctionProfile &YamlBF);
diff --git a/bolt/lib/Core/HashUtilities.cpp b/bolt/lib/Core/HashUtilities.cpp
index c4c67bd68198b..7d72dbe8928ab 100644
--- a/bolt/lib/Core/HashUtilities.cpp
+++ b/bolt/lib/Core/HashUtilities.cpp
@@ -155,5 +155,30 @@ std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
   return HashString;
 }
 
+/// An even looser  hash of a basic block to use with the stale profile
+/// matching. Hashing block function calls allows for broader relational
+/// matching, predicated on the assumption that called functions remain similar
+/// across revisions.
+std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
+  // The hash is computed by creating a string of all lexicographically ordered
+  // called function names.
+  std::multiset<std::string> FunctionNames;
+  for (const MCInst &Instr : BB) {
+    // Skip non-call instructions.
+    if (!BC.MIB->isCall(Instr))
+      continue;
+    const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
+    if (!CallSymbol)
+      continue;
+    FunctionNames.insert(std::string(CallSymbol->getName()));
+  }
+
+  std::string HashString;
+  for (const std::string &FunctionName : FunctionNames)
+    HashString.append(FunctionName);
+
+  return HashString;
+}
+
 } // namespace bolt
 } // namespace llvm
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 365bc5389266d..5b318e6793069 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -152,8 +152,8 @@ struct BlendedBlockHash {
   /// distance, the more similar two blocks are. For identical basic blocks,
   /// the distance is zero.
   uint64_t distance(const BlendedBlockHash &BBH) const {
-    assert(OpcodeHash == BBH.OpcodeHash &&
-           "incorrect blended hash distance computation");
+    // assert(OpcodeHash == BBH.OpcodeHash &&
+    //        "incorrect blended hash distance computation");
     uint64_t Dist = 0;
     // Account for NeighborHash
     Dist += SuccHash == BBH.SuccHash ? 0 : 1;
@@ -187,20 +187,22 @@ class StaleMatcher {
 public:
   /// Initialize stale matcher.
   void init(const std::vector<FlowBlock *> &Blocks,
-            const std::vector<BlendedBlockHash> &Hashes) {
+            const std::vector<BlendedBlockHash> &Hashes,
+            const std::vector<uint64_t> &CallHashes) {
     assert(Blocks.size() == Hashes.size() &&
            "incorrect matcher initialization");
     for (size_t I = 0; I < Blocks.size(); I++) {
       FlowBlock *Block = Blocks[I];
-      uint16_t OpHash = Hashes[I].OpcodeHash;
-      OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block));
+      uint16_t CallHash = CallHashes[I];
+      CallHashToBlocks[CallHash].push_back(std::make_pair(Hashes[I], Block));
     }
   }
 
   /// Find the most similar block for a given hash.
-  const FlowBlock *matchBlock(BlendedBlockHash BlendedHash) const {
-    auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash);
-    if (BlockIt == OpHashToBlocks.end())
+  const FlowBlock *matchBlock(uint64_t CallHash,
+                              BlendedBlockHash BlendedHash) const {
+    auto BlockIt = CallHashToBlocks.find(CallHash);
+    if (BlockIt == CallHashToBlocks.end())
       return nullptr;
     FlowBlock *BestBlock = nullptr;
     uint64_t BestDist = std::numeric_limits<uint64_t>::max();
@@ -222,9 +224,11 @@ class StaleMatcher {
     return Hash1.InstrHash == Hash2.InstrHash;
   }
 
+  // TODO: add "mid confidence match?" for loose hash matching
+
 private:
   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
-  std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
+  std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
 };
 
 void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
@@ -391,17 +395,38 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 /// of the basic blocks in the binary, the count is "matched" to the block.
 /// Similarly, if both the source and the target of a count in the profile are
 /// matched to a jump in the binary, the count is recorded in CFG.
-void matchWeightsByHashes(BinaryContext &BC,
+void matchWeightsByHashes(HashFunction HashFunction,
+                          BinaryContext &BC,
                           const BinaryFunction::BasicBlockOrderType &BlockOrder,
                           const yaml::bolt::BinaryFunctionProfile &YamlBF,
                           FlowFunction &Func) {
+  // TODO: perhaps in the interest of polish we move blended hashes, callhashes,
+  // and blocks into an "intialize stale matcher" function and use it here
+  // or in inferStaleProfile
+
   assert(Func.Blocks.size() == BlockOrder.size() + 1);
 
+  std::vector<uint64_t> CallHashes;
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
+
+  CallHashes.resize(BlockOrder.size());
+  Blocks.resize(BlockOrder.size());
+  BlendedHashes.resize(BlockOrder.size());
+
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
     const BinaryBasicBlock *BB = BlockOrder[I];
     assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
+
+    std::string CallHashStr = hashBlockCalls(BC, *BB);
+    if (HashFunction == HashFunction::StdHash) {
+      CallHashes.push_back(std::hash<std::string>{}(CallHashStr));
+    } else if (HashFunction == HashFunction::XXH3) {
+      CallHashes.push_back(llvm::xxh3_64bits(CallHashStr));
+    } else {
+      llvm_unreachable("Unhandled HashFunction");
+    }
+
     Blocks.push_back(&Func.Blocks[I + 1]);
     BlendedBlockHash BlendedHash(BB->getHash());
     BlendedHashes.push_back(BlendedHash);
@@ -409,7 +434,7 @@ void matchWeightsByHashes(BinaryContext &BC,
                       << Twine::utohexstr(BB->getHash()) << "\n");
   }
   StaleMatcher Matcher;
-  Matcher.init(Blocks, BlendedHashes);
+  Matcher.init(Blocks, BlendedHashes, CallHashes);
 
   // Index in yaml profile => corresponding (matched) block
   DenseMap<uint64_t, const FlowBlock *> MatchedBlocks;
@@ -417,7 +442,7 @@ void matchWeightsByHashes(BinaryContext &BC,
   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
     assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
     BlendedBlockHash YamlHash(YamlBB.Hash);
-    const FlowBlock *MatchedBlock = Matcher.matchBlock(YamlHash);
+    const FlowBlock *MatchedBlock = Matcher.matchBlock(CallHashes[YamlBB.Index], YamlHash);
     // Always match the entry block.
     if (MatchedBlock == nullptr && YamlBB.Index == 0)
       MatchedBlock = Blocks[0];
@@ -729,7 +754,7 @@ bool YAMLProfileReader::inferStaleProfile(
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
+  matchWeightsByHashes(YamlBP.Header.HashFunction, BF.getBinaryContext(), BlockOrder, YamlBF, Func);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2ea1a051d314c..f25f59201f1cd 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -14,7 +14,6 @@
 #include "bolt/Utils/Utils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/xxhash.h"
 
 using namespace llvm;
 
@@ -29,11 +28,6 @@ static llvm::cl::opt<bool>
                cl::desc("ignore hash while reading function profile"),
                cl::Hidden, cl::cat(BoltOptCategory));
 
-llvm::cl::opt<bool>
-    MatchWithCallsAsAnchors("match-with-calls-as-anchors",
-                            cl::desc("Matches with calls as anchors"),
-                            cl::Hidden, cl::cat(BoltOptCategory));
-
 llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
                                   cl::desc("use DFS order for YAML profile"),
                                   cl::Hidden, cl::cat(BoltOptCategory));
@@ -348,80 +342,6 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
   return false;
 }
 
-void YAMLProfileReader::matchWithCallsAsAnchors(
-    BinaryContext &BC, uint64_t &MatchedWithCallsAsAnchors) {
-  auto ComputeCallHash = [&](std::string HashString) {
-    if (YamlBP.Header.HashFunction == HashFunction::StdHash)
-      return std::hash<std::string>{}(HashString);
-    if (YamlBP.Header.HashFunction == HashFunction::XXH3)
-      return llvm::xxh3_64bits(HashString);
-    llvm_unreachable("Unhandled HashFunction");
-  };
-
-  std::unordered_map<uint64_t, BinaryFunction *> CallHashToBF;
-  for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
-    if (ProfiledFunctions.count(BF))
-      continue;
-
-    std::string HashString{""};
-    for (const auto &BB : BF->blocks()) {
-      std::multiset<std::string> FunctionNames;
-      for (const MCInst &Instr : BB) {
-        // Skip non-call instructions.
-        if (!BC.MIB->isCall(Instr))
-          continue;
-        const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
-        if (!CallSymbol)
-          continue;
-        FunctionNames.insert(std::string(CallSymbol->getName()));
-      }
-      // Adds called functions to the hash string in lexigraphic order.
-      for (const std::string &FunctionName : FunctionNames)
-        HashString.append(FunctionName);
-    }
-    // its possible we have some collisions.. our options to solve include
-    // cmp block ct, or potentially fname edit distance. although, thats p exp
-    if (HashString == "")
-      continue;
-    CallHashToBF[ComputeCallHash(HashString)] = BF;
-  }
-
-  std::unordered_map<uint32_t, std::string> ProfiledFunctionIdToName;
-  for (const yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
-    // How do we handle functions with multiple names? LTO specifically,
-    // right now the scope of this is to identical names wo lto i think
-    StringRef Name = YamlBF.Name;
-    const size_t Pos = Name.find("(*");
-    if (Pos != StringRef::npos)
-      Name = Name.substr(0, Pos);
-    ProfiledFunctionIdToName[YamlBF.Id] = Name;
-  }
-  for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
-    if (YamlBF.Used)
-      continue;
-    std::string HashString{""};
-    for (const yaml::bolt::BinaryBasicBlockProfile &Block : YamlBF.Blocks) {
-      std::multiset<std::string> FunctionNames;
-      for (const yaml::bolt::CallSiteInfo &CallSite : Block.CallSites) {
-        std::string &FunctionName = ProfiledFunctionIdToName[CallSite.DestId];
-        FunctionNames.insert(FunctionName);
-      }
-      for (const std::string &FunctionName : FunctionNames)
-        HashString.append(FunctionName);
-    }
-    if (HashString == "")
-      continue;
-    size_t Hash = ComputeCallHash(HashString);
-    auto It = CallHashToBF.find(Hash);
-    if (It == CallHashToBF.end())
-      continue;
-    if (ProfiledFunctions.count(It->second))
-      continue;
-    matchProfileToFunction(YamlBF, *It->second);
-    ++MatchedWithCallsAsAnchors;
-  }
-}
-
 Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   if (opts::Verbosity >= 1) {
     outs() << "BOLT-INFO: YAML profile with hash: ";
@@ -495,20 +415,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
       matchProfileToFunction(YamlBF, *BF);
 
-  uint64_t MatchedWithCallsAsAnchors = 0;
-  if (opts::MatchWithCallsAsAnchors)
-    matchWithCallsAsAnchors(BC, MatchedWithCallsAsAnchors);
-
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
     if (!YamlBF.Used && opts::Verbosity >= 1)
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
              << '\n';
 
-  if (opts::Verbosity >= 2) {
-    outs() << "BOLT-INFO: matched " << MatchedWithCallsAsAnchors
-           << " functions with calls as anchors\n";
-  }
-
   // Set for parseFunctionProfile().
   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
   NormalizeByCalls = usesEvent("branches");

>From 5f83647ae12a2165e18415cb213bec341fc319f2 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 1 Jul 2024 11:42:46 -0700
Subject: [PATCH 6/9] Added CallHash check following OpHash check in block
 matchgin

Created using spr 1.3.4
---
 bolt/include/bolt/Core/HashUtilities.h        |   6 +
 .../include/bolt/Profile/ProfileYAMLMapping.h |   1 +
 bolt/lib/Core/HashUtilities.cpp               |  26 ++++
 bolt/lib/Profile/StaleProfileMatching.cpp     | 112 ++++++++++++------
 bolt/lib/Profile/YAMLProfileReader.cpp        |   4 +
 5 files changed, 113 insertions(+), 36 deletions(-)

diff --git a/bolt/include/bolt/Core/HashUtilities.h b/bolt/include/bolt/Core/HashUtilities.h
index c86cd906b3f34..c84bbad0ba29b 100644
--- a/bolt/include/bolt/Core/HashUtilities.h
+++ b/bolt/include/bolt/Core/HashUtilities.h
@@ -16,6 +16,7 @@
 
 #include "bolt/Core/BinaryBasicBlock.h"
 #include "bolt/Core/BinaryContext.h"
+#include "bolt/Profile/ProfileYAMLMapping.h"
 
 namespace llvm {
 namespace bolt {
@@ -37,6 +38,11 @@ std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB);
 
 std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB);
 
+std::string hashBlockCalls(
+    const std::unordered_map<uint32_t, yaml::bolt::BinaryFunctionProfile *>
+        &IdsToProfiledFunctions,
+    const yaml::bolt::BinaryBasicBlockProfile &YamlBB);
+
 } // namespace bolt
 } // namespace llvm
 
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index 9dd3920dbf094..35a195700a0d5 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -225,6 +225,7 @@ namespace bolt {
 struct BinaryProfile {
   BinaryProfileHeader Header;
   std::vector<BinaryFunctionProfile> Functions;
+  std::unordered_map<uint32_t, BinaryFunctionProfile *> IdToFunctionProfile;
 };
 } // namespace bolt
 
diff --git a/bolt/lib/Core/HashUtilities.cpp b/bolt/lib/Core/HashUtilities.cpp
index 7d72dbe8928ab..601c597126ec7 100644
--- a/bolt/lib/Core/HashUtilities.cpp
+++ b/bolt/lib/Core/HashUtilities.cpp
@@ -180,5 +180,31 @@ std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
   return HashString;
 }
 
+/// The same as the above function, but for profiled functions.
+std::string hashBlockCalls(
+    const std::unordered_map<uint32_t, yaml::bolt::BinaryFunctionProfile *>
+        &IdsToProfiledFunctions,
+    const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
+
+  std::multiset<std::string> FunctionNames;
+  for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) {
+    auto It = IdsToProfiledFunctions.find(CallSiteInfo.DestId);
+    assert(It != IdsToProfiledFunctions.end() &&
+           "All profiled functions should have ids");
+    StringRef Name = It->second->Name;
+    const size_t Pos = Name.find("(*");
+    if (Pos != StringRef::npos)
+      Name = Name.substr(0, Pos);
+    FunctionNames.insert(std::string(Name));
+  }
+
+  std::string HashString;
+  for (const std::string &FunctionName : FunctionNames)
+    HashString.append(FunctionName);
+
+  return HashString;
+}
+//
+
 } // namespace bolt
 } // namespace llvm
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 5b318e6793069..cec7ced707ba0 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -152,8 +152,8 @@ struct BlendedBlockHash {
   /// distance, the more similar two blocks are. For identical basic blocks,
   /// the distance is zero.
   uint64_t distance(const BlendedBlockHash &BBH) const {
-    // assert(OpcodeHash == BBH.OpcodeHash &&
-    //        "incorrect blended hash distance computation");
+    assert(OpcodeHash == BBH.OpcodeHash &&
+           "incorrect blended hash distance computation");
     uint64_t Dist = 0;
     // Account for NeighborHash
     Dist += SuccHash == BBH.SuccHash ? 0 : 1;
@@ -188,21 +188,43 @@ class StaleMatcher {
   /// Initialize stale matcher.
   void init(const std::vector<FlowBlock *> &Blocks,
             const std::vector<BlendedBlockHash> &Hashes,
-            const std::vector<uint64_t> &CallHashes) {
+            const std::vector<std::unique_ptr<uint64_t>> &CallHashes) {
     assert(Blocks.size() == Hashes.size() &&
+           Hashes.size() == CallHashes.size() &&
            "incorrect matcher initialization");
     for (size_t I = 0; I < Blocks.size(); I++) {
       FlowBlock *Block = Blocks[I];
-      uint16_t CallHash = CallHashes[I];
-      CallHashToBlocks[CallHash].push_back(std::make_pair(Hashes[I], Block));
+      uint16_t OpHash = Hashes[I].OpcodeHash;
+      OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block));
+      if (CallHashes[I].get())
+        CallHashToBlocks[*CallHashes[I]].push_back(
+            std::make_pair(Hashes[I], Block));
     }
   }
 
   /// Find the most similar block for a given hash.
-  const FlowBlock *matchBlock(uint64_t CallHash,
-                              BlendedBlockHash BlendedHash) const {
-    auto BlockIt = CallHashToBlocks.find(CallHash);
-    if (BlockIt == CallHashToBlocks.end())
+  const FlowBlock *matchBlock(BlendedBlockHash &BlendedHash,
+                              std::unique_ptr<uint64_t> &CallHash) const {
+    const FlowBlock *BestBlock = matchWithOpcodes(BlendedHash);
+    return BestBlock ? BestBlock : matchWithCalls(BlendedHash, CallHash);
+  }
+
+  /// Returns true if the two basic blocks (in the binary and in the profile)
+  /// corresponding to the given hashes are matched to each other with a high
+  /// confidence.
+  static bool isHighConfidenceMatch(BlendedBlockHash Hash1,
+                                    BlendedBlockHash Hash2) {
+    return Hash1.InstrHash == Hash2.InstrHash;
+  }
+
+private:
+  using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
+  std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
+  std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
+
+  const FlowBlock *matchWithOpcodes(BlendedBlockHash &BlendedHash) const {
+    auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash);
+    if (BlockIt == OpHashToBlocks.end())
       return nullptr;
     FlowBlock *BestBlock = nullptr;
     uint64_t BestDist = std::numeric_limits<uint64_t>::max();
@@ -216,19 +238,24 @@ class StaleMatcher {
     return BestBlock;
   }
 
-  /// Returns true if the two basic blocks (in the binary and in the profile)
-  /// corresponding to the given hashes are matched to each other with a high
-  /// confidence.
-  static bool isHighConfidenceMatch(BlendedBlockHash Hash1,
-                                    BlendedBlockHash Hash2) {
-    return Hash1.InstrHash == Hash2.InstrHash;
+  const FlowBlock *matchWithCalls(BlendedBlockHash &BlendedHash,
+                                  std::unique_ptr<uint64_t> &CallHash) const {
+    if (!CallHash.get())
+      return nullptr;
+    auto BlockIt = CallHashToBlocks.find(*CallHash);
+    if (BlockIt == CallHashToBlocks.end())
+      return nullptr;
+    FlowBlock *BestBlock = nullptr;
+    uint64_t BestDist = std::numeric_limits<uint64_t>::max();
+    for (const auto &[Hash, Block] : BlockIt->second) {
+      uint64_t Dist = Hash.distance(BlendedHash);
+      if (BestBlock == nullptr || Dist < BestDist) {
+        BestDist = Dist;
+        BestBlock = Block;
+      }
+    }
+    return BestBlock;
   }
-
-  // TODO: add "mid confidence match?" for loose hash matching
-
-private:
-  using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
-  std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
 };
 
 void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
@@ -395,34 +422,33 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 /// of the basic blocks in the binary, the count is "matched" to the block.
 /// Similarly, if both the source and the target of a count in the profile are
 /// matched to a jump in the binary, the count is recorded in CFG.
-void matchWeightsByHashes(HashFunction HashFunction,
-                          BinaryContext &BC,
+void matchWeightsByHashes(BinaryContext &BC,
                           const BinaryFunction::BasicBlockOrderType &BlockOrder,
+                          const yaml::bolt::BinaryProfile &YamlBP,
                           const yaml::bolt::BinaryFunctionProfile &YamlBF,
                           FlowFunction &Func) {
-  // TODO: perhaps in the interest of polish we move blended hashes, callhashes,
-  // and blocks into an "intialize stale matcher" function and use it here
-  // or in inferStaleProfile
 
   assert(Func.Blocks.size() == BlockOrder.size() + 1);
 
-  std::vector<uint64_t> CallHashes;
+  std::vector<std::unique_ptr<uint64_t>> CallHashes;
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
 
-  CallHashes.resize(BlockOrder.size());
-  Blocks.resize(BlockOrder.size());
-  BlendedHashes.resize(BlockOrder.size());
+  HashFunction HashFunction = YamlBP.Header.HashFunction;
 
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
     const BinaryBasicBlock *BB = BlockOrder[I];
     assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
 
     std::string CallHashStr = hashBlockCalls(BC, *BB);
-    if (HashFunction == HashFunction::StdHash) {
-      CallHashes.push_back(std::hash<std::string>{}(CallHashStr));
+    if (CallHashStr.empty()) {
+      CallHashes.emplace_back(nullptr);
+    } else if (HashFunction == HashFunction::StdHash) {
+      CallHashes.emplace_back(
+          std::make_unique<uint64_t>(std::hash<std::string>{}(CallHashStr)));
     } else if (HashFunction == HashFunction::XXH3) {
-      CallHashes.push_back(llvm::xxh3_64bits(CallHashStr));
+      CallHashes.emplace_back(
+          std::make_unique<uint64_t>(llvm::xxh3_64bits(CallHashStr)));
     } else {
       llvm_unreachable("Unhandled HashFunction");
     }
@@ -433,6 +459,7 @@ void matchWeightsByHashes(HashFunction HashFunction,
     LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
                       << Twine::utohexstr(BB->getHash()) << "\n");
   }
+
   StaleMatcher Matcher;
   Matcher.init(Blocks, BlendedHashes, CallHashes);
 
@@ -442,8 +469,21 @@ void matchWeightsByHashes(HashFunction HashFunction,
   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
     assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
     BlendedBlockHash YamlHash(YamlBB.Hash);
-    const FlowBlock *MatchedBlock = Matcher.matchBlock(CallHashes[YamlBB.Index], YamlHash);
-    // Always match the entry block.
+
+    const FlowBlock *MatchedBlock = nullptr;
+    std::string CallHashStr =
+        hashBlockCalls(YamlBP.IdToFunctionProfile, YamlBB);
+    std::unique_ptr<uint64_t> CallHash = nullptr;
+    if (HashFunction == HashFunction::StdHash) {
+      CallHash =
+          std::make_unique<uint64_t>(std::hash<std::string>{}(CallHashStr));
+    } else if (HashFunction == HashFunction::XXH3) {
+      CallHash = std::make_unique<uint64_t>(llvm::xxh3_64bits(CallHashStr));
+    } else {
+      llvm_unreachable("Unhandled HashFunction");
+    }
+
+    MatchedBlock = Matcher.matchBlock(YamlHash, CallHash);
     if (MatchedBlock == nullptr && YamlBB.Index == 0)
       MatchedBlock = Blocks[0];
     if (MatchedBlock != nullptr) {
@@ -754,7 +794,7 @@ bool YAMLProfileReader::inferStaleProfile(
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(YamlBP.Header.HashFunction, BF.getBinaryContext(), BlockOrder, YamlBF, Func);
+  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBP, YamlBF, Func);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index f25f59201f1cd..085596daebaed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -325,6 +325,10 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
     }
   }
 
+  // Map profiled function ids to names.
+  for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
+    YamlBP.IdToFunctionProfile[YamlBF.Id] = &YamlBF;
+
   return Error::success();
 }
 

>From 0c542e2ebe51bedaba20dc24df99842c590d3f87 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 1 Jul 2024 14:52:27 -0700
Subject: [PATCH 7/9] Changed IdToProfiles to IdToNames, Moved IdToNames out of
 YamlBP, changed callhash representation

Created using spr 1.3.4
---
 bolt/include/bolt/Core/HashUtilities.h        |  7 ++-
 .../include/bolt/Profile/ProfileYAMLMapping.h |  1 -
 bolt/include/bolt/Profile/YAMLProfileReader.h | 12 +++--
 bolt/lib/Core/HashUtilities.cpp               | 15 +++---
 bolt/lib/Profile/StaleProfileMatching.cpp     | 54 +++++++++----------
 bolt/lib/Profile/YAMLProfileReader.cpp        | 15 +++---
 6 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/bolt/include/bolt/Core/HashUtilities.h b/bolt/include/bolt/Core/HashUtilities.h
index c84bbad0ba29b..ff7af9e15ff14 100644
--- a/bolt/include/bolt/Core/HashUtilities.h
+++ b/bolt/include/bolt/Core/HashUtilities.h
@@ -38,10 +38,9 @@ std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB);
 
 std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB);
 
-std::string hashBlockCalls(
-    const std::unordered_map<uint32_t, yaml::bolt::BinaryFunctionProfile *>
-        &IdsToProfiledFunctions,
-    const yaml::bolt::BinaryBasicBlockProfile &YamlBB);
+std::string
+hashBlockCalls(const DenseMap<uint32_t, std::string *> &IdsToProfiledFunctions,
+               const yaml::bolt::BinaryBasicBlockProfile &YamlBB);
 
 } // namespace bolt
 } // namespace llvm
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index 35a195700a0d5..9dd3920dbf094 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -225,7 +225,6 @@ namespace bolt {
 struct BinaryProfile {
   BinaryProfileHeader Header;
   std::vector<BinaryFunctionProfile> Functions;
-  std::unordered_map<uint32_t, BinaryFunctionProfile *> IdToFunctionProfile;
 };
 } // namespace bolt
 
diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h
index 7a8aa176c30f1..34df64bd573df 100644
--- a/bolt/include/bolt/Profile/YAMLProfileReader.h
+++ b/bolt/include/bolt/Profile/YAMLProfileReader.h
@@ -70,12 +70,16 @@ class YAMLProfileReader : public ProfileReaderBase {
   std::vector<BinaryFunction *> ProfileBFs;
 
   /// Populate \p Function profile with the one supplied in YAML format.
-  bool parseFunctionProfile(BinaryFunction &Function,
-                            const yaml::bolt::BinaryFunctionProfile &YamlBF);
+  bool parseFunctionProfile(
+      const DenseMap<uint32_t, std::string *> &IdToFunctionName,
+      BinaryFunction &Function,
+      const yaml::bolt::BinaryFunctionProfile &YamlBF);
 
   /// Infer function profile from stale data (collected on older binaries).
-  bool inferStaleProfile(BinaryFunction &Function,
-                         const yaml::bolt::BinaryFunctionProfile &YamlBF);
+  bool
+  inferStaleProfile(const DenseMap<uint32_t, std::string *> IdToFunctionName,
+                    BinaryFunction &Function,
+                    const yaml::bolt::BinaryFunctionProfile &YamlBF);
 
   /// Initialize maps for profile matching.
   void buildNameMaps(BinaryContext &BC);
diff --git a/bolt/lib/Core/HashUtilities.cpp b/bolt/lib/Core/HashUtilities.cpp
index 601c597126ec7..6a4b9b9e3a9fb 100644
--- a/bolt/lib/Core/HashUtilities.cpp
+++ b/bolt/lib/Core/HashUtilities.cpp
@@ -155,10 +155,8 @@ std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
   return HashString;
 }
 
-/// An even looser  hash of a basic block to use with the stale profile
-/// matching. Hashing block function calls allows for broader relational
-/// matching, predicated on the assumption that called functions remain similar
-/// across revisions.
+/// An even looser hash of a basic block to use with stale profile matching,
+/// composed of the names of a block's called functions in lexicographic order.
 std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
   // The hash is computed by creating a string of all lexicographically ordered
   // called function names.
@@ -181,17 +179,16 @@ std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
 }
 
 /// The same as the above function, but for profiled functions.
-std::string hashBlockCalls(
-    const std::unordered_map<uint32_t, yaml::bolt::BinaryFunctionProfile *>
-        &IdsToProfiledFunctions,
-    const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
+std::string
+hashBlockCalls(const DenseMap<uint32_t, std::string *> &IdsToProfiledFunctions,
+               const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
 
   std::multiset<std::string> FunctionNames;
   for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) {
     auto It = IdsToProfiledFunctions.find(CallSiteInfo.DestId);
     assert(It != IdsToProfiledFunctions.end() &&
            "All profiled functions should have ids");
-    StringRef Name = It->second->Name;
+    StringRef Name = *It->second;
     const size_t Pos = Name.find("(*");
     if (Pos != StringRef::npos)
       Name = Name.substr(0, Pos);
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index cec7ced707ba0..32dfc89e6c9e0 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -188,7 +188,7 @@ class StaleMatcher {
   /// Initialize stale matcher.
   void init(const std::vector<FlowBlock *> &Blocks,
             const std::vector<BlendedBlockHash> &Hashes,
-            const std::vector<std::unique_ptr<uint64_t>> &CallHashes) {
+            const std::vector<uint64_t> &CallHashes) {
     assert(Blocks.size() == Hashes.size() &&
            Hashes.size() == CallHashes.size() &&
            "incorrect matcher initialization");
@@ -196,15 +196,15 @@ class StaleMatcher {
       FlowBlock *Block = Blocks[I];
       uint16_t OpHash = Hashes[I].OpcodeHash;
       OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block));
-      if (CallHashes[I].get())
-        CallHashToBlocks[*CallHashes[I]].push_back(
+      if (CallHashes[I])
+        CallHashToBlocks[CallHashes[I]].push_back(
             std::make_pair(Hashes[I], Block));
     }
   }
 
   /// Find the most similar block for a given hash.
   const FlowBlock *matchBlock(BlendedBlockHash &BlendedHash,
-                              std::unique_ptr<uint64_t> &CallHash) const {
+                              uint64_t &CallHash) const {
     const FlowBlock *BestBlock = matchWithOpcodes(BlendedHash);
     return BestBlock ? BestBlock : matchWithCalls(BlendedHash, CallHash);
   }
@@ -239,10 +239,10 @@ class StaleMatcher {
   }
 
   const FlowBlock *matchWithCalls(BlendedBlockHash &BlendedHash,
-                                  std::unique_ptr<uint64_t> &CallHash) const {
-    if (!CallHash.get())
+                                  uint64_t CallHash) const {
+    if (!CallHash)
       return nullptr;
-    auto BlockIt = CallHashToBlocks.find(*CallHash);
+    auto BlockIt = CallHashToBlocks.find(CallHash);
     if (BlockIt == CallHashToBlocks.end())
       return nullptr;
     FlowBlock *BestBlock = nullptr;
@@ -422,33 +422,30 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 /// of the basic blocks in the binary, the count is "matched" to the block.
 /// Similarly, if both the source and the target of a count in the profile are
 /// matched to a jump in the binary, the count is recorded in CFG.
-void matchWeightsByHashes(BinaryContext &BC,
-                          const BinaryFunction::BasicBlockOrderType &BlockOrder,
-                          const yaml::bolt::BinaryProfile &YamlBP,
-                          const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                          FlowFunction &Func) {
+void matchWeightsByHashes(
+    BinaryContext &BC,
+    const DenseMap<uint32_t, std::string *> &IdToFunctionName,
+    const BinaryFunction::BasicBlockOrderType &BlockOrder,
+    const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func,
+    HashFunction HashFunction) {
 
   assert(Func.Blocks.size() == BlockOrder.size() + 1);
 
-  std::vector<std::unique_ptr<uint64_t>> CallHashes;
+  std::vector<uint64_t> CallHashes;
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
 
-  HashFunction HashFunction = YamlBP.Header.HashFunction;
-
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
     const BinaryBasicBlock *BB = BlockOrder[I];
     assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
 
     std::string CallHashStr = hashBlockCalls(BC, *BB);
     if (CallHashStr.empty()) {
-      CallHashes.emplace_back(nullptr);
+      CallHashes.push_back(0);
     } else if (HashFunction == HashFunction::StdHash) {
-      CallHashes.emplace_back(
-          std::make_unique<uint64_t>(std::hash<std::string>{}(CallHashStr)));
+      CallHashes.push_back(std::hash<std::string>{}(CallHashStr));
     } else if (HashFunction == HashFunction::XXH3) {
-      CallHashes.emplace_back(
-          std::make_unique<uint64_t>(llvm::xxh3_64bits(CallHashStr)));
+      CallHashes.push_back(llvm::xxh3_64bits(CallHashStr));
     } else {
       llvm_unreachable("Unhandled HashFunction");
     }
@@ -471,14 +468,13 @@ void matchWeightsByHashes(BinaryContext &BC,
     BlendedBlockHash YamlHash(YamlBB.Hash);
 
     const FlowBlock *MatchedBlock = nullptr;
-    std::string CallHashStr =
-        hashBlockCalls(YamlBP.IdToFunctionProfile, YamlBB);
-    std::unique_ptr<uint64_t> CallHash = nullptr;
-    if (HashFunction == HashFunction::StdHash) {
-      CallHash =
-          std::make_unique<uint64_t>(std::hash<std::string>{}(CallHashStr));
+    std::string CallHashStr = hashBlockCalls(IdToFunctionName, YamlBB);
+    uint64_t CallHash = 0;
+    if (CallHashStr.empty()) { // Noop
+    } else if (HashFunction == HashFunction::StdHash) {
+      CallHash = std::hash<std::string>{}(CallHashStr);
     } else if (HashFunction == HashFunction::XXH3) {
-      CallHash = std::make_unique<uint64_t>(llvm::xxh3_64bits(CallHashStr));
+      CallHash = llvm::xxh3_64bits(CallHashStr);
     } else {
       llvm_unreachable("Unhandled HashFunction");
     }
@@ -773,6 +769,7 @@ void assignProfile(BinaryFunction &BF,
 }
 
 bool YAMLProfileReader::inferStaleProfile(
+    const DenseMap<uint32_t, std::string *> IdToFunctionName,
     BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
 
   NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite",
@@ -794,7 +791,8 @@ bool YAMLProfileReader::inferStaleProfile(
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBP, YamlBF, Func);
+  matchWeightsByHashes(BF.getBinaryContext(), IdToFunctionName, BlockOrder,
+                       YamlBF, Func, YamlBP.Header.HashFunction);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 085596daebaed..2734f99e26c01 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -79,6 +79,7 @@ bool YAMLProfileReader::hasLocalsWithFileName() const {
 }
 
 bool YAMLProfileReader::parseFunctionProfile(
+    const DenseMap<uint32_t, std::string *> &IdToFunctionName,
     BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
   BinaryContext &BC = BF.getBinaryContext();
 
@@ -270,7 +271,8 @@ bool YAMLProfileReader::parseFunctionProfile(
     if (YamlBF.NumBasicBlocks != BF.size())
       ++BC.Stats.NumStaleFuncsWithEqualBlockCount;
 
-    if (opts::InferStaleProfile && inferStaleProfile(BF, YamlBF))
+    if (opts::InferStaleProfile &&
+        inferStaleProfile(IdToFunctionName, BF, YamlBF))
       ProfileMatched = true;
   }
   if (ProfileMatched)
@@ -325,10 +327,6 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
     }
   }
 
-  // Map profiled function ids to names.
-  for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
-    YamlBP.IdToFunctionProfile[YamlBF.Id] = &YamlBF;
-
   return Error::success();
 }
 
@@ -428,6 +426,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
   NormalizeByCalls = usesEvent("branches");
 
+  // Map profiled function ids to names.
+  DenseMap<uint32_t, std::string *> IdToFunctionName;
+  for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
+    IdToFunctionName[YamlBF.Id] = &YamlBF.Name;
+
   uint64_t NumUnused = 0;
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
     if (YamlBF.Id >= YamlProfileToFunction.size()) {
@@ -436,7 +439,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       continue;
     }
     if (BinaryFunction *BF = YamlProfileToFunction[YamlBF.Id])
-      parseFunctionProfile(*BF, YamlBF);
+      parseFunctionProfile(IdToFunctionName, *BF, YamlBF);
     else
       ++NumUnused;
   }

>From 5543fa80fe1eea150f9a62900024d46d6920696b Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 2 Jul 2024 09:22:18 -0700
Subject: [PATCH 8/9] Added test, fixed small bug

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp     |   4 +-
 ...match-functions-with-calls-as-anchors.test | 162 ++++++++++++++++++
 2 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/X86/match-functions-with-calls-as-anchors.test

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 32dfc89e6c9e0..13de1b9a64710 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -248,7 +248,9 @@ class StaleMatcher {
     FlowBlock *BestBlock = nullptr;
     uint64_t BestDist = std::numeric_limits<uint64_t>::max();
     for (const auto &[Hash, Block] : BlockIt->second) {
-      uint64_t Dist = Hash.distance(BlendedHash);
+      uint64_t Dist = Hash.OpcodeHash > BlendedHash.OpcodeHash
+                          ? Hash.OpcodeHash - BlendedHash.OpcodeHash
+                          : BlendedHash.OpcodeHash - Hash.OpcodeHash;
       if (BestBlock == nullptr || Dist < BestDist) {
         BestDist = Dist;
         BestBlock = Block;
diff --git a/bolt/test/X86/match-functions-with-calls-as-anchors.test b/bolt/test/X86/match-functions-with-calls-as-anchors.test
new file mode 100644
index 0000000000000..1bbcc463da17a
--- /dev/null
+++ b/bolt/test/X86/match-functions-with-calls-as-anchors.test
@@ -0,0 +1,162 @@
+## Tests matching blocks by called function names.
+
+# REQUIRES: system-linux
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml --profile-ignore-hash -v=1 \
+# RUN:   --dyno-stats --print-cfg --infer-stale-profile=1 --debug 2>&1 | FileCheck %s
+
+# CHECK: BOLT-INFO: applying profile inference for "qux"
+# CHECK: Matched yaml block (bid = 1) with hash 4 to BB (index = 0) with hash 314e1bc10000
+# CHECK:  loose match
+
+# CHECK: BOLT-INFO: applying profile inference for "fred"
+# CHECK: Matched yaml block (bid = 1) with hash 5 to BB (index = 0) with hash 7541bc10000
+# CHECK:  loose match
+
+#--- main.s
+.globl  foo                         # -- Begin function foo
+  .p2align        4, 0x90
+  .type   foo, at function
+foo:                                # @foo
+# %bb.0:
+  pushq   %rbp
+  movq    %rsp, %rbp
+  popq    %rbp
+  retq
+.Lfunc_end0:
+  .size   foo, .Lfunc_end0-foo
+                                      # -- End function
+  .globl  bar                         # -- Begin function bar
+  .p2align        4, 0x90
+  .type   bar, at function
+bar:                                # @bar
+# %bb.0:
+  pushq   %rbp
+  movq    %rsp, %rbp
+  popq    %rbp
+  retq
+.Lfunc_end1:
+  .size   bar, .Lfunc_end1-bar
+                                      # -- End function
+  .globl  qux                         # -- Begin function qux
+  .p2align        4, 0x90
+  .type   qux, at function
+qux:                                # @qux
+# %bb.0:
+  pushq   %rbp
+  movq    %rsp, %rbp
+  callq   foo
+  callq   bar
+  popq    %rbp
+  retq
+.Lfunc_end2:
+  .size   qux, .Lfunc_end2-qux
+                                      # -- End function
+  .globl  fred                        # -- Begin function fred
+  .p2align        4, 0x90
+  .type   fred, at function
+fred:                               # @fred
+# %bb.0:
+  pushq   %rbp
+  movq    %rsp, %rbp
+  callq   foo
+  callq   qux
+  callq   bar
+  callq   bar
+  callq   foo
+  popq    %rbp
+  retq
+.Lfunc_end3:
+  .size   fred, .Lfunc_end3-fred
+                                          # -- End function
+  .globl  main                            # -- Begin function main
+  .p2align        4, 0x90
+  .type   main, at function
+main:                                   # @main
+# %bb.0:
+  pushq   %rbp
+  movq    %rsp, %rbp
+  xorl    %eax, %eax
+  popq    %rbp
+  retq
+.Lfunc_end4:
+  .size   main, .Lfunc_end4-main
+                                  # -- End function
+  .addrsig
+  .addrsig_sym foo
+  .addrsig_sym bar
+  .addrsig_sym qux
+
+#--- yaml
+---
+header:
+  profile-version: 1
+  binary-name:     'match-functions-with-calls-as-anchors.s.tmp.exe'
+  binary-build-id: '<unknown>'
+  profile-flags:   [ lbr ]
+  profile-origin:  branch profile reader
+  profile-events:  ''
+  dfs-order:       false
+  hash-func:       xxh3
+functions:
+  - name:            main
+    fid:             0
+    hash:            0x0000000000000001
+    exec:            1
+    nblocks:         6
+    blocks:
+      - bid:             1
+        hash:            0x0000000000000001
+        insns:           1
+        succ:            [ { bid: 3, cnt: 1} ]
+  - name:            foo
+    fid:             1
+    hash:            0x0000000000000002
+    exec:            1
+    nblocks:         6
+    blocks:
+      - bid:             1
+        hash:            0x0000000000000002
+        insns:           1
+        succ:            [ { bid: 3, cnt: 1} ]
+
+  - name:            bar
+    fid:             2
+    hash:            0x0000000000000003
+    exec:            1
+    nblocks:         6
+    blocks:
+      - bid:             1
+        hash:            0x0000000000000003
+        insns:           1
+        succ:            [ { bid: 3, cnt: 1} ]
+  - name:            qux
+    fid:             3
+    hash:            0x0000000000000004
+    exec:            4
+    nblocks:         6
+    blocks:
+      - bid:             1
+        hash:            0x0000000000000004
+        insns:           1
+        succ:            [ { bid: 3, cnt: 1} ]
+        calls:           [ { off : 0, fid : 1, cnt : 0},
+                           { off : 0, fid : 2, cnt : 0} ]
+  - name:            fred
+    fid:             4
+    hash:            0x0000000000000005
+    exec:            1
+    nblocks:         6
+    blocks:
+      - bid:             1
+        hash:            0x0000000000000005
+        insns:           1
+        succ:            [ { bid: 3, cnt: 1} ]
+        calls:           [ { off : 0, fid : 3, cnt : 0},
+                           { off : 0, fid : 1, cnt : 0},
+                           { off : 0, fid : 2, cnt : 0},
+                           { off : 0, fid : 1, cnt : 0},
+                           { off : 0, fid : 2, cnt : 0} ]
+...

>From f2c4738f0869392ea2a17c6d8127d40b91af5d3f Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 2 Jul 2024 09:33:05 -0700
Subject: [PATCH 9/9] Comments

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp                | 4 ++--
 bolt/test/X86/match-functions-with-calls-as-anchors.test | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 13de1b9a64710..afd3be9845649 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -222,6 +222,7 @@ class StaleMatcher {
   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
   std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
 
+  // Uses OpcodeHash to find the most similar block for a given hash.
   const FlowBlock *matchWithOpcodes(BlendedBlockHash &BlendedHash) const {
     auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash);
     if (BlockIt == OpHashToBlocks.end())
@@ -238,6 +239,7 @@ class StaleMatcher {
     return BestBlock;
   }
 
+  // Uses CallHash to find the most similar block for a given hash.
   const FlowBlock *matchWithCalls(BlendedBlockHash &BlendedHash,
                                   uint64_t CallHash) const {
     if (!CallHash)
@@ -436,7 +438,6 @@ void matchWeightsByHashes(
   std::vector<uint64_t> CallHashes;
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
-
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
     const BinaryBasicBlock *BB = BlockOrder[I];
     assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
@@ -458,7 +459,6 @@ void matchWeightsByHashes(
     LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
                       << Twine::utohexstr(BB->getHash()) << "\n");
   }
-
   StaleMatcher Matcher;
   Matcher.init(Blocks, BlendedHashes, CallHashes);
 
diff --git a/bolt/test/X86/match-functions-with-calls-as-anchors.test b/bolt/test/X86/match-functions-with-calls-as-anchors.test
index 1bbcc463da17a..7fef128453a07 100644
--- a/bolt/test/X86/match-functions-with-calls-as-anchors.test
+++ b/bolt/test/X86/match-functions-with-calls-as-anchors.test
@@ -1,4 +1,4 @@
-## Tests matching blocks by called function names.
+## Tests blocks matching by called function names in inferStaleProfile.
 
 # REQUIRES: system-linux
 # RUN: split-file %s %t



More information about the llvm-branch-commits mailing list