[llvm] [BOLT] Discard BB profiles with a hash of 0 in yaml from a Post-BAT binary (PR #169627)

Jinjie Huang via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 3 04:50:03 PST 2025


https://github.com/Jinjie-Huang updated https://github.com/llvm/llvm-project/pull/169627

>From f9bb4b70a705ae2e1268d4eb1f9ea983a5e5bce5 Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Wed, 26 Nov 2025 18:11:30 +0800
Subject: [PATCH 1/2] discard BB profiles with a hash of 0

---
 bolt/lib/Profile/DataAggregator.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 6b969011df589..73baceae7af55 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2417,6 +2417,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
       // Skip printing if there's no profile data
       llvm::erase_if(
           YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
+            if ((size_t)YamlBB.Hash == 0)
+              return true;
             auto HasCount = [](const auto &SI) { return SI.Count; };
             bool HasAnyCount = YamlBB.ExecCount ||
                                llvm::any_of(YamlBB.Successors, HasCount) ||

>From c8b31b7a74b6e252737a05a586be10a207a848ab Mon Sep 17 00:00:00 2001
From: huangjinjie <huangjinjie at bytedance.com>
Date: Wed, 3 Dec 2025 20:49:29 +0800
Subject: [PATCH 2/2] modify to drop entries missing metadata in BAT writer

---
 .../bolt/Profile/BoltAddressTranslation.h     |  6 ++++
 bolt/lib/Profile/BoltAddressTranslation.cpp   | 31 ++++++++++++++++++-
 bolt/lib/Profile/DataAggregator.cpp           |  2 --
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h
index fcc578f35e322..f6465a6a30a7a 100644
--- a/bolt/include/bolt/Profile/BoltAddressTranslation.h
+++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h
@@ -182,6 +182,8 @@ class BoltAddressTranslation {
   /// translation map entry
   const static uint32_t BRANCHENTRY = 0x1;
 
+  uint64_t NumInvalidEntries = 0;
+
 public:
   /// Map basic block input offset to a basic block index and hash pair.
   class BBHashMapTy {
@@ -283,6 +285,10 @@ class BoltAddressTranslation {
 
 private:
   FuncHashesTy FuncHashes;
+
+  /// Filters out invalid entries from the BAT map.
+  void dropInvalidEntries(MapTy &Map, uint64_t Address,
+                          const BBHashMapTy &BBHashMap);
 };
 } // namespace bolt
 
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index 7ad4e6a2e1411..310d7d940e36b 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -8,6 +8,7 @@
 
 #include "bolt/Profile/BoltAddressTranslation.h"
 #include "bolt/Core/BinaryFunction.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
@@ -145,6 +146,11 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
   uint64_t PrevAddress = 0;
   writeMaps</*Cold=*/false>(PrevAddress, OS);
   writeMaps</*Cold=*/true>(PrevAddress, OS);
+  if (NumInvalidEntries > 0) {
+    BC.errs() << "BOLT-WARNING: " << NumInvalidEntries
+              << " BAT entries were ignored due to missing metadata (possibly "
+                 "inserted by optimizations)\n"
+  }
 
   BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
   BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.getNumFunctions()
@@ -181,6 +187,28 @@ size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map,
   return EqualOffsets;
 }
 
+void BoltAddressTranslation::dropInvalidEntries(MapTy &Map, uint64_t Address,
+                                                const BBHashMapTy &BBHashMap) {
+  std::vector<uint32_t> OffsetsToRemove;
+
+  for (const auto &[OutputOffset, InputOffset] : Map) {
+    if (!(InputOffset & BoltAddressTranslation::BRANCHENTRY) &&
+        (!BBHashMap.isInputBlock(InputOffset >> 1))) {
+      NumInvalidEntries++;
+      OffsetsToRemove.push_back(OutputOffset);
+      if (opts::Verbosity >= 1) {
+        errs() << "BOLT-WARNING: ignoring BAT mapping: "
+               << "OutputOffset: 0x" << Twine::utohexstr(OutputOffset)
+               << ", InputOffset: 0x" << Twine::utohexstr(InputOffset >> 1)
+               << ", at Address: 0x" << Twine::utohexstr(Address) << "\n";
+      }
+    }
+  }
+  for (uint32_t Offset : OffsetsToRemove) {
+    Map.erase(Offset);
+  }
+}
+
 template <bool Cold>
 void BoltAddressTranslation::writeMaps(uint64_t &PrevAddress, raw_ostream &OS) {
   const uint32_t NumFuncs =
@@ -201,6 +229,8 @@ void BoltAddressTranslation::writeMaps(uint64_t &PrevAddress, raw_ostream &OS) {
     const uint64_t HotInputAddress =
         ReverseMap[Cold ? ColdPartSource[Address] : Address];
     MapTy &Map = MapEntry.second;
+    const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
+    dropInvalidEntries(Map, Address, BBHashMap);
     const uint32_t NumEntries = Map.size();
     LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
                       << Twine::utohexstr(Address) << ".\n");
@@ -253,7 +283,6 @@ void BoltAddressTranslation::writeMaps(uint64_t &PrevAddress, raw_ostream &OS) {
         dbgs() << BitMaskStr << '\n';
       });
     }
-    const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
     size_t Index = 0;
     uint64_t InOffset = 0;
     size_t PrevBBIndex = 0;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 73baceae7af55..6b969011df589 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2417,8 +2417,6 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
       // Skip printing if there's no profile data
       llvm::erase_if(
           YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
-            if ((size_t)YamlBB.Hash == 0)
-              return true;
             auto HasCount = [](const auto &SI) { return SI.Count; };
             bool HasAnyCount = YamlBB.ExecCount ||
                                llvm::any_of(YamlBB.Successors, HasCount) ||



More information about the llvm-commits mailing list