[llvm] 091c16f - [llvm-profgen] On-demand symbolization

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 28 09:10:10 PDT 2021


Author: wlei
Date: 2021-09-28T09:09:25-07:00
New Revision: 091c16f76ba1e6341afd717445323f8396b7772f

URL: https://github.com/llvm/llvm-project/commit/091c16f76ba1e6341afd717445323f8396b7772f
DIFF: https://github.com/llvm/llvm-project/commit/091c16f76ba1e6341afd717445323f8396b7772f.diff

LOG: [llvm-profgen] On-demand symbolization

Previously we do symbolization for all the functions and actually we only need the symbols that's hit by the samples.

This can significantly speed up the time for large size binary.

Optimization for per-inliner will come along with next patch.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D110465

Added: 
    

Modified: 
    llvm/tools/llvm-profgen/PerfReader.h
    llvm/tools/llvm-profgen/ProfiledBinary.cpp
    llvm/tools/llvm-profgen/ProfiledBinary.h

Removed: 
    


################################################################################
diff  --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 25fa44284734d..1a8a2a3014a98 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -388,8 +388,8 @@ using ContextSampleCounterMap =
 
 struct FrameStack {
   SmallVector<uint64_t, 16> Stack;
-  const ProfiledBinary *Binary;
-  FrameStack(const ProfiledBinary *B) : Binary(B) {}
+  ProfiledBinary *Binary;
+  FrameStack(ProfiledBinary *B) : Binary(B) {}
   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
     Stack.push_back(Cur->Address);
     return true;
@@ -404,8 +404,8 @@ struct FrameStack {
 
 struct ProbeStack {
   SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
-  const ProfiledBinary *Binary;
-  ProbeStack(const ProfiledBinary *B) : Binary(B) {}
+  ProfiledBinary *Binary;
+  ProbeStack(ProfiledBinary *B) : Binary(B) {}
   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
     const MCDecodedPseudoProbe *CallProbe =
         Binary->getCallProbeForAddr(Cur->Address);
@@ -458,7 +458,7 @@ range as sample counter for further CS profile generation.
 */
 class VirtualUnwinder {
 public:
-  VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
+  VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
       : CtxCounterMap(Counter), Binary(B) {}
   bool unwind(const PerfSample *Sample, uint64_t Repeat);
   std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
@@ -495,7 +495,7 @@ class VirtualUnwinder {
 
   ContextSampleCounterMap *CtxCounterMap;
   // Profiled binary that current frame address belongs to
-  const ProfiledBinary *Binary;
+  ProfiledBinary *Binary;
   // Keep track of all untracked callsites
   std::set<uint64_t> UntrackedCallsites;
 };

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index b04150842fb0e..1e8da15500059 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -184,8 +184,7 @@ void ProfiledBinary::load() {
   // TODO: decode other sections.
 }
 
-bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
-                                        uint64_t Address2) const {
+bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
   uint64_t Offset1 = virtualAddrToOffset(Address1);
   uint64_t Offset2 = virtualAddrToOffset(Address2);
   const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1);
@@ -202,7 +201,7 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
 
 SampleContextFrameVector
 ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
-                                   bool &WasLeafInlined) const {
+                                   bool &WasLeafInlined) {
   SampleContextFrameVector ContextVec;
   // Process from frame root to leaf
   for (auto Address : Stack) {
@@ -358,7 +357,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
       // We don't need symbolized info for probe-based profile, just use an
       // empty stack as an entry to indicate a valid binary offset
       SampleContextFrameVector SymbolizedCallStack;
-      if (!UsePseudoProbes || TrackFuncContextSize) {
+      if (TrackFuncContextSize) {
         InstructionPointer IP(this, Offset);
         // TODO: reallocation of Offset2LocStackMap will lead to dangling
         // strings We need ProfiledBinary to owned these string.
@@ -369,9 +368,9 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
         if (TrackFuncContextSize && !SymbolizedCallStack.empty())
           FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset],
                                                    Size);
-      } else {
-        Offset2LocStackMap[Offset] = SampleContextFrameVector();
       }
+      // Record instruction size.
+      Offset2InstSizeMap[Offset] = Size;
 
       // Populate address maps.
       CodeAddrs.push_back(Offset);

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 01962d9d38e5b..fd719ef443e64 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -169,6 +169,10 @@ class ProfiledBinary {
   std::map<uint64_t, std::pair<std::string, uint64_t>> FuncStartOffsetMap;
   // Offset to context location map. Used to expand the context.
   std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
+
+  // Offset to instruction size map. Also used for quick offset lookup.
+  std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap;
+
   // An array of offsets of all instructions sorted in increasing order. The
   // sorting is needed to fast advance to the next forward/backward instruction.
   std::vector<uint64_t> CodeAddrs;
@@ -269,7 +273,7 @@ class ProfiledBinary {
 
   bool addressIsCode(uint64_t Address) const {
     uint64_t Offset = virtualAddrToOffset(Address);
-    return Offset2LocStackMap.find(Offset) != Offset2LocStackMap.end();
+    return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end();
   }
   bool addressIsCall(uint64_t Address) const {
     uint64_t Offset = virtualAddrToOffset(Address);
@@ -326,11 +330,14 @@ class ProfiledBinary {
     return FuncSizeTracker.getFuncSizeForContext(Context);
   }
 
-  const SampleContextFrameVector &getFrameLocationStack(uint64_t Offset) const {
-    auto I = Offset2LocStackMap.find(Offset);
-    assert(I != Offset2LocStackMap.end() &&
-           "Can't find location for offset in the binary");
-    return I->second;
+  const SampleContextFrameVector &
+  getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) {
+    auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector());
+    if (I.second) {
+      InstructionPointer IP(this, Offset);
+      I.first->second = symbolize(IP, true, UseProbeDiscriminator);
+    }
+    return I.first->second;
   }
 
   Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) {
@@ -341,14 +348,14 @@ class ProfiledBinary {
   }
 
   // Compare two addresses' inline context
-  bool inlineContextEqual(uint64_t Add1, uint64_t Add2) const;
+  bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
 
   // Get the full context of the current stack with inline context filled in.
   // It will search the disassembling info stored in Offset2LocStackMap. This is
   // used as the key of function sample map
   SampleContextFrameVector
   getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
-                     bool &WasLeafInlined) const;
+                     bool &WasLeafInlined);
 
   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
     return ProbeDecoder.getCallProbeForAddr(Address);


        


More information about the llvm-commits mailing list