[llvm] ce40843 - [llvm-profgen][CSSPGO] On-demand function size computation for preinliner

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 28 09:10:12 PDT 2021


Author: wlei
Date: 2021-09-28T09:09:38-07:00
New Revision: ce40843a3fe120621bb6e4aa07dc9cbf76b6aa0e

URL: https://github.com/llvm/llvm-project/commit/ce40843a3fe120621bb6e4aa07dc9cbf76b6aa0e
DIFF: https://github.com/llvm/llvm-project/commit/ce40843a3fe120621bb6e4aa07dc9cbf76b6aa0e.diff

LOG: [llvm-profgen][CSSPGO] On-demand function size computation for preinliner

Similar to https://reviews.llvm.org/D110465, we can compute function size on-demand for the functions that's hit by samples.

Here we leverage the raw range samples' address to compute a set of sample hit function. Then `BinarySizeContextTracker` just works on those function range for the size.

Reviewed By: hoy

Differential Revision: https://reviews.llvm.org/D110466

Added: 
    

Modified: 
    llvm/tools/llvm-profgen/ProfileGenerator.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.h
    llvm/tools/llvm-profgen/ProfiledBinary.cpp
    llvm/tools/llvm-profgen/ProfiledBinary.h

Removed: 
    


################################################################################
diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 77aff9d9967a7..05e8e4dc78a0d 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -425,6 +425,10 @@ FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
 
 void CSProfileGenerator::generateProfile() {
   FunctionSamples::ProfileIsCS = true;
+
+  if (Binary->getTrackFuncContextSize())
+    computeSizeForProfiledFunctions();
+
   if (Binary->usePseudoProbes()) {
     // Enable pseudo probe functionalities in SampleProf
     FunctionSamples::ProfileIsProbeBased = true;
@@ -435,6 +439,29 @@ void CSProfileGenerator::generateProfile() {
   postProcessProfiles();
 }
 
+void CSProfileGenerator::computeSizeForProfiledFunctions() {
+  // Hash map to deduplicate the function range and the item is a pair of
+  // function start and end offset.
+  std::unordered_map<uint64_t, uint64_t> FuncRanges;
+  // Go through all the ranges in the CS counters, use the start of the range to
+  // look up the function it belongs and record the function range.
+  for (const auto &CI : SampleCounters) {
+    for (auto Item : CI.second.RangeCounter) {
+      // FIXME: Filter the bogus crossing function range.
+      uint64_t RangeStartOffset = Item.first.first;
+      auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset);
+      if (FuncRange.second != 0)
+        FuncRanges[FuncRange.first] = FuncRange.second;
+    }
+  }
+
+  for (auto I : FuncRanges) {
+    uint64_t StartOffset = I.first;
+    uint64_t EndOffset = I.second;
+    Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
+  }
+}
+
 void CSProfileGenerator::generateLineNumBasedProfile() {
   for (const auto &CI : SampleCounters) {
     const StringBasedCtxKey *CtxKey =

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 64f469be363f0..86e2d68f216e4 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -228,6 +228,9 @@ class CSProfileGenerator : public ProfileGeneratorBase {
   FunctionSamples &
   getFunctionProfileForContext(const SampleContextFrameVector &Context,
                                bool WasLeafInlined = false);
+  // For profiled only functions, on-demand compute their inline context
+  // function byte size which is used by the pre-inliner.
+  void computeSizeForProfiledFunctions();
   // Post processing for profiles before writing out, such as mermining
   // and trimming cold profiles, running preinliner on profiles.
   void postProcessProfiles();

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 1e8da15500059..2d1f68ff77ec5 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -293,10 +293,10 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
   uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress();
   uint64_t SectSize = Section.getSize();
   uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress();
-  uint64_t EndOffset = (SI + 1 < SE)
-                           ? Symbols[SI + 1].Addr - getPreferredBaseAddress()
-                           : SectionOffset + SectSize;
-  if (StartOffset >= EndOffset)
+  uint64_t NextStartOffset =
+      (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress()
+                    : SectionOffset + SectSize;
+  if (StartOffset >= NextStartOffset)
     return true;
 
   StringRef SymbolName =
@@ -316,10 +316,11 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
   };
 
   uint64_t Offset = StartOffset;
+  uint64_t EndOffset = 0;
   // Size of a consecutive invalid instruction range starting from Offset -1
   // backwards.
   uint64_t InvalidInstLength = 0;
-  while (Offset < EndOffset) {
+  while (Offset < NextStartOffset) {
     MCInst Inst;
     uint64_t Size;
     // Disassemble an instruction.
@@ -353,32 +354,19 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
 
     if (Disassembled) {
       const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
-      // Populate a vector of the symbolized callsite at this location
-      // We don't need symbolized info for probe-based profile, just use an
-      // empty stack as an entry to indicate a valid binary offset
-      SampleContextFrameVector SymbolizedCallStack;
-      if (TrackFuncContextSize) {
-        InstructionPointer IP(this, Offset);
-        // TODO: reallocation of Offset2LocStackMap will lead to dangling
-        // strings We need ProfiledBinary to owned these string.
-        Offset2LocStackMap[Offset] = symbolize(IP, true, UsePseudoProbes);
-        SampleContextFrameVector &SymbolizedCallStack =
-            Offset2LocStackMap[Offset];
-        // Record instruction size for the corresponding context
-        if (TrackFuncContextSize && !SymbolizedCallStack.empty())
-          FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset],
-                                                   Size);
-      }
+
       // Record instruction size.
       Offset2InstSizeMap[Offset] = Size;
 
       // Populate address maps.
-      CodeAddrs.push_back(Offset);
+      CodeAddrOffsets.push_back(Offset);
       if (MCDesc.isCall())
         CallAddrs.insert(Offset);
       else if (MCDesc.isReturn())
         RetAddrs.insert(Offset);
 
+      EndOffset = Offset;
+
       if (InvalidInstLength) {
         WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
         InvalidInstLength = 0;
@@ -560,6 +548,26 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
   return CallStack;
 }
 
+void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
+                                                       uint64_t EndOffset) {
+  uint32_t Index = getIndexForOffset(StartOffset);
+  if (CodeAddrOffsets[Index] != StartOffset)
+    WithColor::warning() << "Invalid start instruction at "
+                         << format("%8" PRIx64, StartOffset) << "\n";
+
+  uint64_t Offset = CodeAddrOffsets[Index];
+  while (Offset <= EndOffset) {
+    const SampleContextFrameVector &SymbolizedCallStack =
+        getFrameLocationStack(Offset, UsePseudoProbes);
+    uint64_t Size = Offset2InstSizeMap[Offset];
+
+    // Record instruction size for the corresponding context
+    FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
+
+    Offset = CodeAddrOffsets[++Index];
+  }
+}
+
 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
                                        uint64_t Address, bool RoundToNext)
     : Binary(Binary), Address(Address) {

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index fd719ef443e64..64759cad8fa92 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -175,7 +175,7 @@ class ProfiledBinary {
 
   // An array of offsets of all instructions sorted in increasing order. The
   // sorting is needed to fast advance to the next forward/backward instruction.
-  std::vector<uint64_t> CodeAddrs;
+  std::vector<uint64_t> CodeAddrOffsets;
   // A set of call instruction offsets. Used by virtual unwinding.
   std::unordered_set<uint64_t> CallAddrs;
   // A set of return instruction offsets. Used by virtual unwinding.
@@ -231,7 +231,6 @@ class ProfiledBinary {
   SampleContextFrameVector symbolize(const InstructionPointer &IP,
                                      bool UseCanonicalFnName = false,
                                      bool UseProbeDiscriminator = false);
-
   /// Decode the interesting parts of the binary and build internal data
   /// structures. On high level, the parts of interest are:
   ///   1. Text sections, including the main code section and the PLT
@@ -289,18 +288,21 @@ class ProfiledBinary {
   }
 
   uint64_t getAddressforIndex(uint64_t Index) const {
-    return offsetToVirtualAddr(CodeAddrs[Index]);
+    return offsetToVirtualAddr(CodeAddrOffsets[Index]);
   }
 
   bool usePseudoProbes() const { return UsePseudoProbes; }
-  // Get the index in CodeAddrs for the address
+  // Get the index in CodeAddrOffsets for the address
   // As we might get an address which is not the code
   // here it would round to the next valid code address by
   // using lower bound operation
+  uint32_t getIndexForOffset(uint64_t Offset) const {
+    auto Low = llvm::lower_bound(CodeAddrOffsets, Offset);
+    return Low - CodeAddrOffsets.begin();
+  }
   uint32_t getIndexForAddr(uint64_t Address) const {
     uint64_t Offset = virtualAddrToOffset(Address);
-    auto Low = llvm::lower_bound(CodeAddrs, Offset);
-    return Low - CodeAddrs.begin();
+    return getIndexForOffset(Offset);
   }
 
   uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const {
@@ -356,6 +358,10 @@ class ProfiledBinary {
   SampleContextFrameVector
   getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
                      bool &WasLeafInlined);
+  // Go through instructions among the given range and record its size for the
+  // inline context.
+  void computeInlinedContextSizeForRange(uint64_t StartOffset,
+                                         uint64_t EndOffset);
 
   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
     return ProbeDecoder.getCallProbeForAddr(Address);
@@ -384,6 +390,8 @@ class ProfiledBinary {
     return ProbeDecoder.getInlinerDescForProbe(Probe);
   }
 
+  bool getTrackFuncContextSize() { return TrackFuncContextSize; }
+
   bool getIsLoadedByMMap() { return IsLoadedByMMap; }
 
   void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; }


        


More information about the llvm-commits mailing list