[llvm] [BOLT] Cover all call sites in writeBATYAML (PR #87743)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 11 12:14:25 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/87743

>From c97c6553f9ec84093e7c78650596fdd7aa3461f5 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 4 Apr 2024 21:31:18 -0700
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 .../bolt/Profile/BoltAddressTranslation.h     |  10 ++
 bolt/include/bolt/Profile/DataAggregator.h    |  15 +-
 bolt/include/bolt/Profile/YAMLProfileWriter.h |  13 +-
 bolt/lib/Profile/BoltAddressTranslation.cpp   |  51 +++++-
 bolt/lib/Profile/DataAggregator.cpp           | 155 +++++++-----------
 bolt/lib/Profile/YAMLProfileWriter.cpp        |  24 ++-
 .../X86/bolt-address-translation-yaml.test    |   7 +-
 .../X86/yaml-secondary-entry-discriminator.s  |  56 ++++++-
 8 files changed, 203 insertions(+), 128 deletions(-)

diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h
index caf907cc43da3e..92c23b9d909b12 100644
--- a/bolt/include/bolt/Profile/BoltAddressTranslation.h
+++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h
@@ -19,6 +19,7 @@
 #include <unordered_map>
 
 namespace llvm {
+class MCSymbol;
 class raw_ostream;
 
 namespace object {
@@ -123,6 +124,12 @@ class BoltAddressTranslation {
   std::unordered_map<uint32_t, std::vector<uint32_t>>
   getBFBranches(uint64_t FuncOutputAddress) const;
 
+  /// For a given \p Symbol in the output binary, returns a corresponding pair
+  /// of parent BinaryFunction and secondary entry point in it.
+  std::pair<const BinaryFunction *, unsigned>
+  translateSymbol(const BinaryContext &BC, const MCSymbol &Symbol,
+                  uint32_t Offset) const;
+
 private:
   /// Helper to update \p Map by inserting one or more BAT entries reflecting
   /// \p BB for function located at \p FuncAddress. At least one entry will be
@@ -158,6 +165,9 @@ class BoltAddressTranslation {
   /// Map a function to its secondary entry points vector
   std::unordered_map<uint64_t, std::vector<uint32_t>> SecondaryEntryPointsMap;
 
+  /// Returns a secondary entry point id for a given \p Address and \p Offset.
+  unsigned getSecondaryEntryPointId(uint64_t Address, uint32_t Offset) const;
+
   /// Links outlined cold bocks to their original function
   std::map<uint64_t, uint64_t> ColdPartSource;
 
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 4fbe524b1c385d..659a8488d1534c 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -245,14 +245,12 @@ class DataAggregator : public DataReader {
   /// disassembled BinaryFunctions
   BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address) const;
 
+  /// Perform BAT translation for a given \p Func and return the parent
+  /// BinaryFunction or nullptr.
+  BinaryFunction *getParentFunction(const BinaryFunction &Func) const;
+
   /// Retrieve the location name to be used for samples recorded in \p Func.
-  /// If doing BAT translation, link cold parts to the hot part  names (used by
-  /// the original binary).  \p Count specifies how many samples were recorded
-  /// at that location, so we can tally total activity in cold areas if we are
-  /// dealing with profiling data collected in a bolted binary. For LBRs,
-  /// \p Count should only be used for the source of the branch to avoid
-  /// counting cold activity twice (one for source and another for destination).
-  StringRef getLocationName(BinaryFunction &Func, uint64_t Count);
+  StringRef getLocationName(const BinaryFunction &Func) const;
 
   /// Semantic actions - parser hooks to interpret parsed perf samples
   /// Register a sample (non-LBR mode), i.e. a new hit at \p Address
@@ -467,9 +465,6 @@ class DataAggregator : public DataReader {
   std::error_code writeBATYAML(BinaryContext &BC,
                                StringRef OutputFilename) const;
 
-  /// Fixup profile collected on BOLTed binary, namely handle split functions.
-  void fixupBATProfile(BinaryContext &BC);
-
   /// Filter out binaries based on PID
   void filterBinaryMMapInfo();
 
diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h
index 882748627e7f54..4a9355dfceac9e 100644
--- a/bolt/include/bolt/Profile/YAMLProfileWriter.h
+++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h
@@ -15,6 +15,7 @@
 
 namespace llvm {
 namespace bolt {
+class BoltAddressTranslation;
 class RewriteInstance;
 
 class YAMLProfileWriter {
@@ -31,8 +32,16 @@ class YAMLProfileWriter {
   /// Save execution profile for that instance.
   std::error_code writeProfile(const RewriteInstance &RI);
 
-  static yaml::bolt::BinaryFunctionProfile convert(const BinaryFunction &BF,
-                                                   bool UseDFS);
+  static yaml::bolt::BinaryFunctionProfile
+  convert(const BinaryFunction &BF, bool UseDFS,
+          const BoltAddressTranslation *BAT = nullptr);
+
+  /// Set CallSiteInfo destination fields from \p Symbol and return a target
+  /// BinaryFunction for that symbol.
+  static const BinaryFunction *
+  setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
+                    const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
+                    uint32_t Offset = 0);
 };
 
 } // namespace bolt
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index bcd4a457ce3b49..90c62c13dcaac7 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -100,7 +100,7 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
     LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
     LLVM_DEBUG(dbgs() << " Address reference: 0x"
                       << Twine::utohexstr(Function.getOutputAddress()) << "\n");
-    LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(OutputAddress)));
+    LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(InputAddress)));
     LLVM_DEBUG(dbgs() << " Secondary Entry Points: " << NumSecondaryEntryPoints
                       << '\n');
 
@@ -197,8 +197,9 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
             ? SecondaryEntryPointsMap[Address].size()
             : 0;
     if (Cold) {
-      size_t HotIndex =
-          std::distance(ColdPartSource.begin(), ColdPartSource.find(Address));
+      auto HotEntryIt = Maps.find(ColdPartSource[Address]);
+      assert(HotEntryIt != Maps.end());
+      size_t HotIndex = std::distance(Maps.begin(), HotEntryIt);
       encodeULEB128(HotIndex - PrevIndex, OS);
       PrevIndex = HotIndex;
     } else {
@@ -597,5 +598,49 @@ BoltAddressTranslation::getBFBranches(uint64_t OutputAddress) const {
   return Branches;
 }
 
+unsigned
+BoltAddressTranslation::getSecondaryEntryPointId(uint64_t Address,
+                                                 uint32_t Offset) const {
+  auto FunctionIt = SecondaryEntryPointsMap.find(Address);
+  if (FunctionIt == SecondaryEntryPointsMap.end())
+    return 0;
+  const std::vector<uint32_t> &Offsets = FunctionIt->second;
+  auto OffsetIt = std::find(Offsets.begin(), Offsets.end(), Offset);
+  if (OffsetIt == Offsets.end())
+    return 0;
+  // Adding one here because main entry point is not stored in BAT, and
+  // enumeration for secondary entry points starts with 1.
+  return OffsetIt - Offsets.begin() + 1;
+}
+
+std::pair<const BinaryFunction *, unsigned>
+BoltAddressTranslation::translateSymbol(const BinaryContext &BC,
+                                        const MCSymbol &Symbol,
+                                        uint32_t Offset) const {
+  // The symbol could be a secondary entry in a cold fragment.
+  uint64_t SymbolValue = cantFail(errorOrToExpected(BC.getSymbolValue(Symbol)));
+
+  const BinaryFunction *Callee = BC.getFunctionForSymbol(&Symbol);
+  assert(Callee);
+
+  // Containing function, not necessarily the same as symbol value.
+  const uint64_t CalleeAddress = Callee->getAddress();
+  const uint32_t OutputOffset = SymbolValue - CalleeAddress;
+
+  const uint64_t ParentAddress = fetchParentAddress(CalleeAddress);
+  const uint64_t HotAddress = ParentAddress ? ParentAddress : CalleeAddress;
+
+  const BinaryFunction *ParentBF = BC.getBinaryFunctionAtAddress(HotAddress);
+
+  const uint32_t InputOffset =
+      translate(CalleeAddress, OutputOffset, /*IsBranchSrc*/ false) + Offset;
+
+  unsigned SecondaryEntryId{0};
+  if (InputOffset)
+    SecondaryEntryId = getSecondaryEntryPointId(HotAddress, InputOffset);
+
+  return std::pair(ParentBF, SecondaryEntryId);
+}
+
 } // namespace bolt
 } // namespace llvm
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 05099aa25ce227..cd45b352315d79 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -604,8 +604,6 @@ Error DataAggregator::readProfile(BinaryContext &BC) {
     // BAT YAML is handled by DataAggregator since normal YAML output requires
     // CFG which is not available in BAT mode.
     if (usesBAT()) {
-      // Postprocess split function profile for BAT
-      fixupBATProfile(BC);
       if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
         if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename))
           report_error("cannot create output data file", EC);
@@ -664,18 +662,19 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
                                                 /*UseMaxSize=*/true);
 }
 
-StringRef DataAggregator::getLocationName(BinaryFunction &Func,
-                                          uint64_t Count) {
+BinaryFunction *
+DataAggregator::getParentFunction(const BinaryFunction &Func) const {
+  if (BAT)
+    if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress()))
+      return getBinaryFunctionContainingAddress(HotAddr);
+  return nullptr;
+}
+
+StringRef DataAggregator::getLocationName(const BinaryFunction &Func) const {
   if (!BAT)
     return Func.getOneName();
 
   const BinaryFunction *OrigFunc = &Func;
-  if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
-    NumColdSamples += Count;
-    BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
-    if (HotFunc)
-      OrigFunc = HotFunc;
-  }
   // If it is a local function, prefer the name containing the file name where
   // the local function was declared
   for (StringRef AlternativeName : OrigFunc->getNames()) {
@@ -690,12 +689,17 @@ StringRef DataAggregator::getLocationName(BinaryFunction &Func,
   return OrigFunc->getOneName();
 }
 
-bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
+bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
                               uint64_t Count) {
+  BinaryFunction *ParentFunc = getParentFunction(OrigFunc);
+  BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
+  if (ParentFunc)
+    NumColdSamples += Count;
+
   auto I = NamesToSamples.find(Func.getOneName());
   if (I == NamesToSamples.end()) {
     bool Success;
-    StringRef LocName = getLocationName(Func, Count);
+    StringRef LocName = getLocationName(Func);
     std::tie(I, Success) = NamesToSamples.insert(
         std::make_pair(Func.getOneName(),
                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
@@ -715,22 +719,12 @@ bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
   FuncBranchData *AggrData = getBranchData(Func);
   if (!AggrData) {
     AggrData = &NamesToBranches[Func.getOneName()];
-    AggrData->Name = getLocationName(Func, Count);
+    AggrData->Name = getLocationName(Func);
     setBranchData(Func, AggrData);
   }
 
-  From -= Func.getAddress();
-  To -= Func.getAddress();
   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
                     << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
-  if (BAT) {
-    From = BAT->translate(Func.getAddress(), From, /*IsBranchSrc=*/true);
-    To = BAT->translate(Func.getAddress(), To, /*IsBranchSrc=*/false);
-    LLVM_DEBUG(
-        dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
-               << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
-  }
-
   AggrData->bumpBranchCount(From, To, Count, Mispreds);
   return true;
 }
@@ -744,30 +738,24 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
   StringRef SrcFunc;
   StringRef DstFunc;
   if (FromFunc) {
-    SrcFunc = getLocationName(*FromFunc, Count);
+    SrcFunc = getLocationName(*FromFunc);
     FromAggrData = getBranchData(*FromFunc);
     if (!FromAggrData) {
       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
       FromAggrData->Name = SrcFunc;
       setBranchData(*FromFunc, FromAggrData);
     }
-    From -= FromFunc->getAddress();
-    if (BAT)
-      From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
 
     recordExit(*FromFunc, From, Mispreds, Count);
   }
   if (ToFunc) {
-    DstFunc = getLocationName(*ToFunc, 0);
+    DstFunc = getLocationName(*ToFunc);
     ToAggrData = getBranchData(*ToFunc);
     if (!ToAggrData) {
       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
       ToAggrData->Name = DstFunc;
       setBranchData(*ToFunc, ToAggrData);
     }
-    To -= ToFunc->getAddress();
-    if (BAT)
-      To = BAT->translate(ToFunc->getAddress(), To, /*IsBranchSrc=*/false);
 
     recordEntry(*ToFunc, To, Mispreds, Count);
   }
@@ -783,15 +771,32 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
 
 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
                               uint64_t Mispreds) {
-  BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
-  BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
+  auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
+    if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
+      Addr -= Func->getAddress();
+
+      if (BAT)
+        Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
+
+      if (BinaryFunction *ParentFunc = getParentFunction(*Func)) {
+        Func = ParentFunc;
+        if (IsFrom)
+          NumColdSamples += Count;
+      }
+
+      return Func;
+    }
+    return nullptr;
+  };
+
+  BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
+  BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
   if (!FromFunc && !ToFunc)
     return false;
 
   // Treat recursive control transfers as inter-branches.
-  if (FromFunc == ToFunc && (To != ToFunc->getAddress())) {
-    recordBranch(*FromFunc, From - FromFunc->getAddress(),
-                 To - FromFunc->getAddress(), Count, Mispreds);
+  if (FromFunc == ToFunc && To != 0) {
+    recordBranch(*FromFunc, From, To, Count, Mispreds);
     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
   }
 
@@ -842,9 +847,14 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
                     << FromFunc->getPrintName() << ":"
                     << Twine::utohexstr(First.To) << " to "
                     << Twine::utohexstr(Second.From) << ".\n");
-  for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
-    doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
-                  Pair.second + FromFunc->getAddress(), Count, false);
+  BinaryFunction *ParentFunc = getParentFunction(*FromFunc);
+  for (auto [From, To] : *FTs) {
+    if (BAT) {
+      From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
+      To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
+    }
+    doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
+  }
 
   return true;
 }
@@ -2273,29 +2283,6 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
   return std::error_code();
 }
 
-void DataAggregator::fixupBATProfile(BinaryContext &BC) {
-  for (auto &[FuncName, Branches] : NamesToBranches) {
-    BinaryData *BD = BC.getBinaryDataByName(FuncName);
-    assert(BD);
-    uint64_t FuncAddress = BD->getAddress();
-    if (!BAT->isBATFunction(FuncAddress))
-      continue;
-    // Filter out cold fragments
-    if (!BD->getSectionName().equals(BC.getMainCodeSectionName()))
-      continue;
-    // Convert inter-branches between hot and cold fragments into
-    // intra-branches.
-    for (auto &[OffsetFrom, CallToMap] : Branches.InterIndex) {
-      for (auto &[CallToLoc, CallToIdx] : CallToMap) {
-        if (CallToLoc.Name != FuncName)
-          continue;
-        Branches.IntraIndex[OffsetFrom][CallToLoc.Offset] = CallToIdx;
-        Branches.InterIndex[OffsetFrom].erase(CallToLoc);
-      }
-    }
-  }
-}
-
 std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
                                              StringRef OutputFilename) const {
   std::error_code EC;
@@ -2333,7 +2320,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
       if (BAT->isBATFunction(Function.getAddress()))
         continue;
       BP.Functions.emplace_back(
-          YAMLProfileWriter::convert(Function, /*UseDFS=*/false));
+          YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT));
     }
 
     for (const auto &KV : NamesToBranches) {
@@ -2345,9 +2332,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
       uint64_t FuncAddress = BD->getAddress();
       if (!BAT->isBATFunction(FuncAddress))
         continue;
-      // Filter out cold fragments
-      if (!BD->getSectionName().equals(BC.getMainCodeSectionName()))
-        continue;
+      // Cold fragments must be handled by doBranch.
+      assert(BD->getSectionName().equals(BC.getMainCodeSectionName()));
       BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress);
       assert(BF);
       YamlBF.Name = FuncName.str();
@@ -2386,40 +2372,9 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
             YamlCSI.Count = BI.Branches;
             YamlCSI.Mispreds = BI.Mispreds;
             YamlCSI.Offset = BranchOffset - Offset;
-            BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name);
-            if (!CallTargetBD) {
-              YamlBB.CallSites.emplace_back(YamlCSI);
-              continue;
-            }
-            uint64_t CallTargetAddress = CallTargetBD->getAddress();
-            BinaryFunction *CallTargetBF =
-                BC.getBinaryFunctionAtAddress(CallTargetAddress);
-            if (!CallTargetBF) {
-              YamlBB.CallSites.emplace_back(YamlCSI);
-              continue;
-            }
-            // Calls between hot and cold fragments must be handled in
-            // fixupBATProfile.
-            assert(CallTargetBF != BF && "invalid CallTargetBF");
-            YamlCSI.DestId = CallTargetBF->getFunctionNumber();
-            if (CallToLoc.Offset) {
-              if (BAT->isBATFunction(CallTargetAddress)) {
-                LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary "
-                                     "entry point in BAT function "
-                                  << CallToLoc.Name << '\n');
-              } else if (const BinaryBasicBlock *CallTargetBB =
-                             CallTargetBF->getBasicBlockAtOffset(
-                                 CallToLoc.Offset)) {
-                // Only record true call information, ignoring returns (normally
-                // won't have a target basic block) and jumps to the landing
-                // pads (not an entry point).
-                if (CallTargetBB->isEntryPoint()) {
-                  YamlCSI.EntryDiscriminator =
-                      CallTargetBF->getEntryIDForSymbol(
-                          CallTargetBB->getLabel());
-                }
-              }
-            }
+            if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name))
+              YAMLProfileWriter::setCSIDestination(BC, YamlCSI, BD->getSymbol(),
+                                                   BAT, CallToLoc.Offset);
             YamlBB.CallSites.emplace_back(YamlCSI);
           }
         }
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index 0f082086c1fc24..f1cdfc09a8a2f7 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -9,9 +9,11 @@
 #include "bolt/Profile/YAMLProfileWriter.h"
 #include "bolt/Core/BinaryBasicBlock.h"
 #include "bolt/Core/BinaryFunction.h"
+#include "bolt/Profile/BoltAddressTranslation.h"
 #include "bolt/Profile/ProfileReaderBase.h"
 #include "bolt/Rewrite/RewriteInstance.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -25,17 +27,19 @@ extern llvm::cl::opt<bool> ProfileUseDFS;
 namespace llvm {
 namespace bolt {
 
-/// Set CallSiteInfo destination fields from \p Symbol and return a target
-/// BinaryFunction for that symbol.
-static const BinaryFunction *setCSIDestination(const BinaryContext &BC,
-                                               yaml::bolt::CallSiteInfo &CSI,
-                                               const MCSymbol *Symbol) {
+const BinaryFunction *YAMLProfileWriter::setCSIDestination(
+    const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
+    const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
+    uint32_t Offset) {
   CSI.DestId = 0; // designated for unknown functions
   CSI.EntryDiscriminator = 0;
+
   if (Symbol) {
     uint64_t EntryID = 0;
-    if (const BinaryFunction *const Callee =
+    if (const BinaryFunction *Callee =
             BC.getFunctionForSymbol(Symbol, &EntryID)) {
+      if (BAT && BAT->isBATFunction(Callee->getAddress()))
+        std::tie(Callee, EntryID) = BAT->translateSymbol(BC, *Symbol, Offset);
       CSI.DestId = Callee->getFunctionNumber();
       CSI.EntryDiscriminator = EntryID;
       return Callee;
@@ -45,7 +49,8 @@ static const BinaryFunction *setCSIDestination(const BinaryContext &BC,
 }
 
 yaml::bolt::BinaryFunctionProfile
-YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) {
+YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
+                           const BoltAddressTranslation *BAT) {
   yaml::bolt::BinaryFunctionProfile YamlBF;
   const BinaryContext &BC = BF.getBinaryContext();
 
@@ -98,7 +103,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) {
           continue;
         for (const IndirectCallProfile &CSP : ICSP.get()) {
           StringRef TargetName = "";
-          const BinaryFunction *Callee = setCSIDestination(BC, CSI, CSP.Symbol);
+          const BinaryFunction *Callee =
+              setCSIDestination(BC, CSI, CSP.Symbol, BAT);
           if (Callee)
             TargetName = Callee->getOneName();
           CSI.Count = CSP.Count;
@@ -109,7 +115,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) {
         StringRef TargetName = "";
         const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr);
         const BinaryFunction *const Callee =
-            setCSIDestination(BC, CSI, CalleeSymbol);
+            setCSIDestination(BC, CSI, CalleeSymbol, BAT);
         if (Callee)
           TargetName = Callee->getOneName();
 
diff --git a/bolt/test/X86/bolt-address-translation-yaml.test b/bolt/test/X86/bolt-address-translation-yaml.test
index 7fdf7709a8b9da..af24c3d84a0f15 100644
--- a/bolt/test/X86/bolt-address-translation-yaml.test
+++ b/bolt/test/X86/bolt-address-translation-yaml.test
@@ -36,9 +36,14 @@ YAML-BAT-CHECK-NEXT:   - bid:   0
 YAML-BAT-CHECK-NEXT:     insns: 26
 YAML-BAT-CHECK-NEXT:     hash:  0xA900AE79CFD40000
 YAML-BAT-CHECK-NEXT:     succ:  [ { bid: 3, cnt: 0 }, { bid: 1, cnt: 0 } ]
+# Calls from no-BAT to BAT function
+YAML-BAT-CHECK:        - bid:   28
+YAML-BAT-CHECK-NEXT:     insns: 13
+YAML-BAT-CHECK-NEXT:     hash:  0xB2F04C1F25F00400
+YAML-BAT-CHECK-NEXT:     calls: [ { off: 0x21, fid: [[#SOLVECUBIC:]], cnt: 25 }, { off: 0x2D, fid: [[#]], cnt: 9 } ]
 # Function covered by BAT with calls
 YAML-BAT-CHECK:      - name:    SolveCubic
-YAML-BAT-CHECK-NEXT:   fid:     [[#]]
+YAML-BAT-CHECK-NEXT:   fid:     [[#SOLVECUBIC]]
 YAML-BAT-CHECK-NEXT:   hash:    0x6AF7E61EA3966722
 YAML-BAT-CHECK-NEXT:   exec:    25
 YAML-BAT-CHECK-NEXT:   nblocks: 15
diff --git a/bolt/test/X86/yaml-secondary-entry-discriminator.s b/bolt/test/X86/yaml-secondary-entry-discriminator.s
index 43c2e2a7f05549..35e9a079ae0def 100644
--- a/bolt/test/X86/yaml-secondary-entry-discriminator.s
+++ b/bolt/test/X86/yaml-secondary-entry-discriminator.s
@@ -11,17 +11,17 @@
 # RUN: FileCheck %s -input-file %t.yaml
 # CHECK:      - name:    main
 # CHECK-NEXT:   fid:     2
-# CHECK-NEXT:   hash:    0xADF270D550151185
+# CHECK-NEXT:   hash:    {{.*}}
 # CHECK-NEXT:   exec:    0
 # CHECK-NEXT:   nblocks: 4
 # CHECK-NEXT:   blocks:
 # CHECK:          - bid:   1
 # CHECK-NEXT:       insns: 1
-# CHECK-NEXT:       hash:  0x36A303CBA4360014
+# CHECK-NEXT:       hash:  {{.*}}
 # CHECK-NEXT:       calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1 } ]
 # CHECK:          - bid:   2
 # CHECK-NEXT:       insns: 5
-# CHECK-NEXT:       hash:  0x8B2F5747CD0019
+# CHECK-NEXT:       hash:  {{.*}}
 # CHECK-NEXT:       calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1, mis: 1 } ]
 
 # Make sure that the profile is attached correctly
@@ -33,15 +33,61 @@
 # CHECK-CFG:      callq *%rax # Offset: [[#]] # CallProfile: 1 (1 misses) :
 # CHECK-CFG-NEXT:     { secondary_entry: 1 (1 misses) }
 
+# YAML BAT test of calling BAT secondary entry from non-BAT function
+# Now force-split func and skip main (making it call secondary entries)
+# RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \
+# RUN:   --split-functions --split-strategy=all --split-all-cold --enable-bat
+
+# Prepare pre-aggregated profile using %t.bat
+# RUN: link_fdata %s %t.bat %t.preagg PREAGG
+# Strip labels used for pre-aggregated profile
+# RUN: llvm-strip -NLcall -NLindcall %t.bat
+
+# Convert pre-aggregated profile using BAT
+# RUN: perf2bolt %t.bat -p %t.preagg --pa -o %t.bat.fdata -w %t.bat.yaml
+
+# Convert BAT fdata into YAML
+# RUN: llvm-bolt %t.exe -data %t.bat.fdata -w %t.bat.fdata-yaml -o /dev/null
+
+# Check fdata YAML - make sure that a direct call has discriminator field
+# RUN: FileCheck %s --input-file %t.bat.fdata-yaml -check-prefix CHECK-BAT-YAML
+
+# Check BAT YAML - make sure that a direct call has discriminator field
+# RUN: FileCheck %s --input-file %t.bat.yaml --check-prefix CHECK-BAT-YAML
+
+# YAML BAT test of calling BAT secondary entry from BAT function
+# RUN: llvm-bolt %t.exe -o %t.bat2 --data %t.fdata --funcs=main,func \
+# RUN:   --split-functions --split-strategy=all --split-all-cold --enable-bat
+
+# CHECK-BAT-YAML:      - name:    main
+# CHECK-BAT-YAML-NEXT:   fid:     2
+# CHECK-BAT-YAML-NEXT:   hash:    0xADF270D550151185
+# CHECK-BAT-YAML-NEXT:   exec:    0
+# CHECK-BAT-YAML-NEXT:   nblocks: 4
+# CHECK-BAT-YAML-NEXT:   blocks:
+# CHECK-BAT-YAML:          - bid:   1
+# CHECK-BAT-YAML-NEXT:       insns: [[#]]
+# CHECK-BAT-YAML-NEXT:       hash:  0x36A303CBA4360018
+# CHECK-BAT-YAML-NEXT:       calls: [ { off: 0x0, fid: [[#]], disc: 1, cnt: 1 } ]
+
 .globl func
 .type	func, @function
 func:
 # FDATA: 0 [unknown] 0 1 func 0 1 0
+# PREAGG: B X:0 #func# 1 1
   .cfi_startproc
   pushq   %rbp
   movq    %rsp, %rbp
+  # Placeholder code to make splitting profitable
+.rept 5
+  testq   %rax, %rax
+.endr
 .globl secondary_entry
 secondary_entry:
+  # Placeholder code to make splitting profitable
+.rept 5
+  testq   %rax, %rax
+.endr
   popq    %rbp
   retq
   nopl    (%rax)
@@ -58,12 +104,16 @@ main:
   movl    $0, -4(%rbp)
   testq   %rax, %rax
   jne     Lindcall
+.globl Lcall
 Lcall:
   call    secondary_entry
 # FDATA: 1 main #Lcall# 1 secondary_entry 0 1 1
+# PREAGG: B #Lcall# #secondary_entry# 1 1
+.globl Lindcall
 Lindcall:
   callq   *%rax
 # FDATA: 1 main #Lindcall# 1 secondary_entry 0 1 1
+# PREAGG: B #Lindcall# #secondary_entry# 1 1
   xorl    %eax, %eax
   addq    $16, %rsp
   popq    %rbp

>From 251020f10d1e6a7a888164748f78acc6994e0ab3 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Fri, 5 Apr 2024 07:36:40 -0700
Subject: [PATCH 2/4] Remove stats logging

Created using spr 1.3.4
---
 bolt/lib/Profile/DataAggregator.cpp | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 119d082c50c18c..1d0737167c808e 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2395,19 +2395,6 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
             !YamlBB.CallSites.empty())
           YamlBF.Blocks.emplace_back(YamlBB);
       }
-
-      for (const auto &[BranchOffset, _] : Branches.InterIndex) {
-        bool Matched =
-            llvm::any_of(llvm::make_second_range(BFBranches),
-                         [&](const std::vector<uint32_t> &BranchOffsets) {
-                           return llvm::is_contained(BranchOffsets,
-                                                     BranchOffset);
-                         });
-        if (!Matched && opts::Verbosity >= 1)
-          errs() << "BOLT-WARNING: Couldn't match call site "
-                 << formatv("{0}@{1:x} to YAML profile\n", FuncName,
-                            BranchOffset);
-      }
       BP.Functions.emplace_back(YamlBF);
     }
   }

>From 47ecafeb5a0dd9f3f4e103145010d335dc494123 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Fri, 5 Apr 2024 14:35:25 -0700
Subject: [PATCH 3/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 bolt/include/bolt/Profile/DataAggregator.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 659a8488d1534c..3b8624585c19bd 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -225,6 +225,10 @@ class DataAggregator : public DataReader {
   /// Aggregation statistics
   uint64_t NumInvalidTraces{0};
   uint64_t NumLongRangeTraces{0};
+  /// Specifies how many samples were recorded in cold areas if we are dealing
+  /// with profiling data collected in a bolted binary. For LBRs, incremented
+  /// for the source of the branch to avoid counting cold activity twice (one
+  /// for source and another for destination).
   uint64_t NumColdSamples{0};
 
   /// Looks into system PATH for Linux Perf and set up the aggregator to use it

>From 46f3d30d3d51a0982c78d19aa308666ced7abecd Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Fri, 5 Apr 2024 15:05:11 -0700
Subject: [PATCH 4/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 bolt/lib/Profile/DataAggregator.cpp           | 37 +++++++++++-
 bolt/lib/Profile/YAMLProfileWriter.cpp        |  1 -
 .../X86/yaml-secondary-entry-discriminator.s  | 56 +------------------
 3 files changed, 37 insertions(+), 57 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index cd45b352315d79..4b87fdb452abcc 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2372,9 +2372,40 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
             YamlCSI.Count = BI.Branches;
             YamlCSI.Mispreds = BI.Mispreds;
             YamlCSI.Offset = BranchOffset - Offset;
-            if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name))
-              YAMLProfileWriter::setCSIDestination(BC, YamlCSI, BD->getSymbol(),
-                                                   BAT, CallToLoc.Offset);
+            BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name);
+            if (!CallTargetBD) {
+              YamlBB.CallSites.emplace_back(YamlCSI);
+              continue;
+            }
+            uint64_t CallTargetAddress = CallTargetBD->getAddress();
+            BinaryFunction *CallTargetBF =
+                BC.getBinaryFunctionAtAddress(CallTargetAddress);
+            if (!CallTargetBF) {
+              YamlBB.CallSites.emplace_back(YamlCSI);
+              continue;
+            }
+            // Calls between hot and cold fragments must be handled in
+            // fixupBATProfile.
+            assert(CallTargetBF != BF && "invalid CallTargetBF");
+            YamlCSI.DestId = CallTargetBF->getFunctionNumber();
+            if (CallToLoc.Offset) {
+              if (BAT->isBATFunction(CallTargetAddress)) {
+                LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary "
+                                     "entry point in BAT function "
+                                  << CallToLoc.Name << '\n');
+              } else if (const BinaryBasicBlock *CallTargetBB =
+                             CallTargetBF->getBasicBlockAtOffset(
+                                 CallToLoc.Offset)) {
+                // Only record true call information, ignoring returns (normally
+                // won't have a target basic block) and jumps to the landing
+                // pads (not an entry point).
+                if (CallTargetBB->isEntryPoint()) {
+                  YamlCSI.EntryDiscriminator =
+                      CallTargetBF->getEntryIDForSymbol(
+                          CallTargetBB->getLabel());
+                }
+              }
+            }
             YamlBB.CallSites.emplace_back(YamlCSI);
           }
         }
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index f1cdfc09a8a2f7..ef04ba0d21ad75 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -13,7 +13,6 @@
 #include "bolt/Profile/ProfileReaderBase.h"
 #include "bolt/Rewrite/RewriteInstance.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/bolt/test/X86/yaml-secondary-entry-discriminator.s b/bolt/test/X86/yaml-secondary-entry-discriminator.s
index 35e9a079ae0def..43c2e2a7f05549 100644
--- a/bolt/test/X86/yaml-secondary-entry-discriminator.s
+++ b/bolt/test/X86/yaml-secondary-entry-discriminator.s
@@ -11,17 +11,17 @@
 # RUN: FileCheck %s -input-file %t.yaml
 # CHECK:      - name:    main
 # CHECK-NEXT:   fid:     2
-# CHECK-NEXT:   hash:    {{.*}}
+# CHECK-NEXT:   hash:    0xADF270D550151185
 # CHECK-NEXT:   exec:    0
 # CHECK-NEXT:   nblocks: 4
 # CHECK-NEXT:   blocks:
 # CHECK:          - bid:   1
 # CHECK-NEXT:       insns: 1
-# CHECK-NEXT:       hash:  {{.*}}
+# CHECK-NEXT:       hash:  0x36A303CBA4360014
 # CHECK-NEXT:       calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1 } ]
 # CHECK:          - bid:   2
 # CHECK-NEXT:       insns: 5
-# CHECK-NEXT:       hash:  {{.*}}
+# CHECK-NEXT:       hash:  0x8B2F5747CD0019
 # CHECK-NEXT:       calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1, mis: 1 } ]
 
 # Make sure that the profile is attached correctly
@@ -33,61 +33,15 @@
 # CHECK-CFG:      callq *%rax # Offset: [[#]] # CallProfile: 1 (1 misses) :
 # CHECK-CFG-NEXT:     { secondary_entry: 1 (1 misses) }
 
-# YAML BAT test of calling BAT secondary entry from non-BAT function
-# Now force-split func and skip main (making it call secondary entries)
-# RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \
-# RUN:   --split-functions --split-strategy=all --split-all-cold --enable-bat
-
-# Prepare pre-aggregated profile using %t.bat
-# RUN: link_fdata %s %t.bat %t.preagg PREAGG
-# Strip labels used for pre-aggregated profile
-# RUN: llvm-strip -NLcall -NLindcall %t.bat
-
-# Convert pre-aggregated profile using BAT
-# RUN: perf2bolt %t.bat -p %t.preagg --pa -o %t.bat.fdata -w %t.bat.yaml
-
-# Convert BAT fdata into YAML
-# RUN: llvm-bolt %t.exe -data %t.bat.fdata -w %t.bat.fdata-yaml -o /dev/null
-
-# Check fdata YAML - make sure that a direct call has discriminator field
-# RUN: FileCheck %s --input-file %t.bat.fdata-yaml -check-prefix CHECK-BAT-YAML
-
-# Check BAT YAML - make sure that a direct call has discriminator field
-# RUN: FileCheck %s --input-file %t.bat.yaml --check-prefix CHECK-BAT-YAML
-
-# YAML BAT test of calling BAT secondary entry from BAT function
-# RUN: llvm-bolt %t.exe -o %t.bat2 --data %t.fdata --funcs=main,func \
-# RUN:   --split-functions --split-strategy=all --split-all-cold --enable-bat
-
-# CHECK-BAT-YAML:      - name:    main
-# CHECK-BAT-YAML-NEXT:   fid:     2
-# CHECK-BAT-YAML-NEXT:   hash:    0xADF270D550151185
-# CHECK-BAT-YAML-NEXT:   exec:    0
-# CHECK-BAT-YAML-NEXT:   nblocks: 4
-# CHECK-BAT-YAML-NEXT:   blocks:
-# CHECK-BAT-YAML:          - bid:   1
-# CHECK-BAT-YAML-NEXT:       insns: [[#]]
-# CHECK-BAT-YAML-NEXT:       hash:  0x36A303CBA4360018
-# CHECK-BAT-YAML-NEXT:       calls: [ { off: 0x0, fid: [[#]], disc: 1, cnt: 1 } ]
-
 .globl func
 .type	func, @function
 func:
 # FDATA: 0 [unknown] 0 1 func 0 1 0
-# PREAGG: B X:0 #func# 1 1
   .cfi_startproc
   pushq   %rbp
   movq    %rsp, %rbp
-  # Placeholder code to make splitting profitable
-.rept 5
-  testq   %rax, %rax
-.endr
 .globl secondary_entry
 secondary_entry:
-  # Placeholder code to make splitting profitable
-.rept 5
-  testq   %rax, %rax
-.endr
   popq    %rbp
   retq
   nopl    (%rax)
@@ -104,16 +58,12 @@ main:
   movl    $0, -4(%rbp)
   testq   %rax, %rax
   jne     Lindcall
-.globl Lcall
 Lcall:
   call    secondary_entry
 # FDATA: 1 main #Lcall# 1 secondary_entry 0 1 1
-# PREAGG: B #Lcall# #secondary_entry# 1 1
-.globl Lindcall
 Lindcall:
   callq   *%rax
 # FDATA: 1 main #Lindcall# 1 secondary_entry 0 1 1
-# PREAGG: B #Lindcall# #secondary_entry# 1 1
   xorl    %eax, %eax
   addq    $16, %rsp
   popq    %rbp



More information about the llvm-commits mailing list