[llvm] a6f15e9 - [CSSPGO] Use probe inline tree to track zero size fully optimized context for pre-inliner

Wenlei He via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 25 09:01:20 PDT 2021


Author: Wenlei He
Date: 2021-08-25T09:01:11-07:00
New Revision: a6f15e9a49a1a5bcd1ce7f30b1201d977d8a8ede

URL: https://github.com/llvm/llvm-project/commit/a6f15e9a49a1a5bcd1ce7f30b1201d977d8a8ede
DIFF: https://github.com/llvm/llvm-project/commit/a6f15e9a49a1a5bcd1ce7f30b1201d977d8a8ede.diff

LOG: [CSSPGO] Use probe inline tree to track zero size fully optimized context for pre-inliner

This is a follow up diff for BinarySizeContextTracker to track zero size for fully optimized inlinee. When an inlinee is fully optimized away, we won't be able to get its size through symbolizing instructions, hence we will treat the corresponding context size as unknown. However by traversing the inlined probe forest, we know what're original inlinees regardless of optimization. If a context show up in inlined probes, but not during symbolization, we know that it's fully optimized away hence its size is zero instead of unknown. It should provide more accurate size cost estimation for pre-inliner to make better inline decisions in llvm-profgen.

Differential Revision: https://reviews.llvm.org/D108350

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCPseudoProbe.h
    llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
    llvm/lib/Transforms/IPO/SampleContextTracker.cpp
    llvm/test/tools/llvm-profgen/cs-preinline-cost.test
    llvm/tools/llvm-profgen/ProfiledBinary.cpp
    llvm/tools/llvm-profgen/ProfiledBinary.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 76cda75d6878f..3457a1bd302a1 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -246,9 +246,8 @@ class MCPseudoProbeInlineTreeBase {
 // A Tri-tree based data structure to group probes by inline stack.
 // A tree is allocated for a standalone .text section. A fake
 // instance is created as the root of a tree.
-// A real instance of this class is created for each function, either an
-// unlined function that has code in .text section or an inlined function.
-
+// A real instance of this class is created for each function, either a
+// not inlined function that has code in .text section or an inlined function.
 class MCPseudoProbeInlineTree
     : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
                                          MCPseudoProbeInlineTree> {

diff  --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index d280d83088c34..c246bd9a375d1 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -37,10 +37,10 @@ class ContextTrieNode {
 public:
   ContextTrieNode(ContextTrieNode *Parent = nullptr,
                   StringRef FName = StringRef(),
-                  FunctionSamples *FSamples = nullptr, uint32_t FSize = 0,
+                  FunctionSamples *FSamples = nullptr,
                   LineLocation CallLoc = {0, 0})
       : ParentContext(Parent), FuncName(FName), FuncSamples(FSamples),
-        FuncSize(FSize), CallSiteLoc(CallLoc){};
+        CallSiteLoc(CallLoc){};
   ContextTrieNode *getChildContext(const LineLocation &CallSite,
                                    StringRef ChildName);
   ContextTrieNode *getHottestChildContext(const LineLocation &CallSite);
@@ -57,8 +57,8 @@ class ContextTrieNode {
   StringRef getFuncName() const;
   FunctionSamples *getFunctionSamples() const;
   void setFunctionSamples(FunctionSamples *FSamples);
-  uint32_t getFunctionSize() const;
-  void setFunctionSize(uint32_t FSize);
+  Optional<uint32_t> getFunctionSize() const;
+  void addFunctionSize(uint32_t FSize);
   LineLocation getCallSiteLoc() const;
   ContextTrieNode *getParentContext() const;
   void setParentContext(ContextTrieNode *Parent);
@@ -81,7 +81,7 @@ class ContextTrieNode {
   FunctionSamples *FuncSamples;
 
   // Function size for current context
-  uint32_t FuncSize;
+  Optional<uint32_t> FuncSize;
 
   // Callsite location in parent context
   LineLocation CallSiteLoc;

diff  --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 21b5d764221f2..c4734be82d955 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -127,9 +127,14 @@ void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) {
   FuncSamples = FSamples;
 }
 
-uint32_t ContextTrieNode::getFunctionSize() const { return FuncSize; }
+Optional<uint32_t> ContextTrieNode::getFunctionSize() const { return FuncSize; }
 
-void ContextTrieNode::setFunctionSize(uint32_t FSize) { FuncSize = FSize; }
+void ContextTrieNode::addFunctionSize(uint32_t FSize) {
+  if (!FuncSize.hasValue())
+    FuncSize = 0;
+
+  FuncSize = FuncSize.getValue() + FSize;
+}
 
 LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; }
 
@@ -193,8 +198,7 @@ ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
   if (!AllowCreate)
     return nullptr;
 
-  AllChildContext[Hash] =
-      ContextTrieNode(this, CalleeName, nullptr, 0, CallSite);
+  AllChildContext[Hash] = ContextTrieNode(this, CalleeName, nullptr, CallSite);
   return &AllChildContext[Hash];
 }
 

diff  --git a/llvm/test/tools/llvm-profgen/cs-preinline-cost.test b/llvm/test/tools/llvm-profgen/cs-preinline-cost.test
index 572dce22390ff..f473ca3607529 100644
--- a/llvm/test/tools/llvm-profgen/cs-preinline-cost.test
+++ b/llvm/test/tools/llvm-profgen/cs-preinline-cost.test
@@ -11,7 +11,9 @@ CHECK-DEFAULT-NEXT:   Inlined context profile for: main:7 @ _Z3fooi (callee size
 CHECK-DEFAULT-NEXT:   Inlined context profile for: main:8 @ _Z3fooi (callee size: 4, call count:544)
 
 CHECK-CSCOST:      Process main for context-sensitive pre-inlining (pre-inline size: 69, size limit: 828)
-CHECK-CSCOST-NEXT:   Inlined context profile for: main:9 @ _Z3fooi (callee size: 264, call count:545)
+; This inlinee is fully optimized away, make sure we have the correct zero size for that context even if the size is
+; not available through symbolization.
+CHECK-CSCOST-NEXT:   Inlined context profile for: main:9 @ _Z3fooi (callee size: 0, call count:545)
 CHECK-CSCOST-NEXT:   Inlined context profile for: main:7 @ _Z3fooi (callee size: 279, call count:545)
 CHECK-CSCOST-NEXT:   Inlined context profile for: main:8 @ _Z3fooi (callee size: 44, call count:544)
 

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index edc539f29f2d5..1360b920a9cd7 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -63,7 +63,7 @@ void BinarySizeContextTracker::addInstructionForContext(
     IsLeaf = false;
   }
 
-  CurNode->setFunctionSize(CurNode->getFunctionSize() + InstrSize);
+  CurNode->addFunctionSize(InstrSize);
 }
 
 uint32_t
@@ -73,7 +73,7 @@ BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) {
   StringRef ContextRemain = Context;
   StringRef ChildContext;
   StringRef CallerName;
-  uint32_t Size = 0;
+  Optional<uint32_t> Size;
 
   // Start from top-level context-less function, travese down the reverse
   // context trie to find the best/longest match for given context, then
@@ -87,23 +87,64 @@ BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) {
     SampleContext::decodeContextString(ChildContext, CallerName, CallSiteLoc);
     PrevNode = CurrNode;
     CurrNode = CurrNode->getChildContext(CallSiteLoc, CallerName);
-    if (CurrNode && CurrNode->getFunctionSize())
-      Size = CurrNode->getFunctionSize();
+    if (CurrNode && CurrNode->getFunctionSize().hasValue())
+      Size = CurrNode->getFunctionSize().getValue();
   }
 
   // If we traversed all nodes along the path of the context and haven't
   // found a size yet, pivot to look for size from sibling nodes, i.e size
   // of inlinee under 
diff erent context.
-  if (!Size) {
+  if (!Size.hasValue()) {
     if (!CurrNode)
       CurrNode = PrevNode;
-    while (!Size && CurrNode) {
+    while (!Size.hasValue() && CurrNode &&
+           !CurrNode->getAllChildContext().empty()) {
       CurrNode = &CurrNode->getAllChildContext().begin()->second;
-      Size = CurrNode->getFunctionSize();
+      if (CurrNode->getFunctionSize().hasValue())
+        Size = CurrNode->getFunctionSize().getValue();
     }
   }
 
-  return Size;
+  assert(Size.hasValue() && "We should at least find one context size.");
+  return Size.getValue();
+}
+
+void BinarySizeContextTracker::trackInlineesOptimizedAway(
+    MCPseudoProbeDecoder &ProbeDecoder) {
+  ProbeFrameStack ProbeContext;
+  for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
+    trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext);
+}
+
+void BinarySizeContextTracker::trackInlineesOptimizedAway(
+    MCPseudoProbeDecoder &ProbeDecoder,
+    MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) {
+  StringRef FuncName =
+      ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
+  ProbeContext.emplace_back(FuncName, 0);
+
+  // This ProbeContext has a probe, so it has code before inlining and
+  // optimization. Make sure we mark its size as known.
+  if (!ProbeNode.getProbes().empty()) {
+    ContextTrieNode *SizeContext = &RootContext;
+    for (auto &ProbeFrame : reverse(ProbeContext)) {
+      StringRef CallerName = ProbeFrame.first;
+      LineLocation CallsiteLoc(ProbeFrame.second, 0);
+      SizeContext =
+          SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName);
+    }
+    // Add 0 size to make known.
+    SizeContext->addFunctionSize(0);
+  }
+
+  // DFS down the probe inline tree
+  for (const auto &ChildNode : ProbeNode.getChildren()) {
+    InlineSite Location = ChildNode.first;
+    ProbeContext.back().second = std::get<1>(Location);
+    trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), ProbeContext);
+  }
+
+  ProbeContext.pop_back();
 }
 
 void ProfiledBinary::load() {
@@ -130,6 +171,10 @@ void ProfiledBinary::load() {
   // Disassemble the text sections.
   disassemble(Obj);
 
+  // Track size for optimized inlinees when probe is available
+  if (UsePseudoProbes && TrackFuncContextSize)
+    FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder);
+
   // Use function start and return address to infer prolog and epilog
   ProEpilogTracker.inferPrologOffsets(FuncStartAddrMap);
   ProEpilogTracker.inferEpilogOffsets(RetAddrs);

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 9a924e6bd5754..5e5ff9fc408b4 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -118,9 +118,18 @@ class BinarySizeContextTracker {
   // closest matching context.
   uint32_t getFuncSizeForContext(const SampleContext &Context);
 
+  // For inlinees that are full optimized away, we can establish zero size using
+  // their remaining probes.
+  void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder);
+
   void dump() { RootContext.dumpTree(); }
 
 private:
+  using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
+  void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
+                              MCDecodedPseudoProbeInlineTree &ProbeNode,
+                              ProbeFrameStack &Context);
+
   // Root node for context trie tree, node that this is a reverse context trie
   // with callee as parent and caller as child. This way we can traverse from
   // root to find the best/longest matching context if an exact match does not


        


More information about the llvm-commits mailing list