[llvm] 23391fe - [llvm-profgen] Generating probe-based non-CS profile.

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 1 18:49:17 PST 2022


Author: Hongtao Yu
Date: 2022-03-01T18:49:08-08:00
New Revision: 23391febd87779f1ed57ae6bb384ab00367969c1

URL: https://github.com/llvm/llvm-project/commit/23391febd87779f1ed57ae6bb384ab00367969c1
DIFF: https://github.com/llvm/llvm-project/commit/23391febd87779f1ed57ae6bb384ab00367969c1.diff

LOG: [llvm-profgen] Generating probe-based non-CS profile.

I'm bring up the support of pseudo-probe-based non-CS profile generation. The approach is quite similar to generating dwarf-based non-CS profile. The main difference is for a given linear instruction range, instead of each disassembled instruction,  pseudo probes that are covered by the range are processed. The pseudo probe extraction code is shared with CS probe profile generation.

I'm seeing 0.7% performance win for one of our internal large benchmark compared to using non-CS dwarf-based profile, and 0.5% win for another large benchmark when combined with profi.

Reviewed By: wenlei

Differential Revision: https://reviews.llvm.org/D120335

Added: 
    llvm/test/tools/llvm-profgen/inline-pseudoprobe.test
    llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test

Modified: 
    llvm/tools/llvm-profgen/ProfileGenerator.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test
new file mode 100644
index 0000000000000..87c77f2512100
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test
@@ -0,0 +1,46 @@
+; RUN: llvm-profgen --format=text --ignore-stack-samples --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0
+; RUN: FileCheck %s --input-file %t
+
+; CHECK:     main:88:0
+; CHECK-NEXT: 1: 0
+; CHECK-NEXT: 2: foo:88
+; CHECK-NEXT:  1: 0
+; CHECK-NEXT:  2: 15
+; CHECK-NEXT:  3: 15
+; CHECK-NEXT:  4: 14
+; CHECK-NEXT:  5: 1
+; CHECK-NEXT:  6: 15
+; CHECK-NEXT:  7: 0
+; CHECK-NEXT:  9: 0
+; CHECK-NEXT:  8: bar:28
+; CHECK-NEXT:   1: 14
+; CHECK-NEXT:   4: 14
+; CHECK-NEXT:   !CFGChecksum: 72617220756
+; CHECK-NEXT:  !CFGChecksum: 563088904013236
+; CHECK-NEXT: !CFGChecksum: 281479271677951
+
+
+; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
+; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
+; -g test.c  -o a.out
+
+#include <stdio.h>
+
+int bar(int x, int y) {
+  if (x % 3) {
+    return x - y;
+  }
+  return x + y;
+}
+
+void foo() {
+  int s, i = 0;
+  while (i++ < 4000 * 4000)
+    if (i % 91) s = bar(i, s); else s += 30;
+  printf("sum is %d\n", s);
+}
+
+int main() {
+  foo();
+  return 0;
+}

diff  --git a/llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test
new file mode 100644
index 0000000000000..79c395b1ef6c2
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/noinline-pseudoprobe.test
@@ -0,0 +1,48 @@
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t1 --ignore-stack-samples
+; RUN: FileCheck %s --input-file %t1
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t2 --ignore-stack-samples
+; RUN: FileCheck %s --input-file %t2
+
+
+; CHECK:     foo:75:0
+; CHECK-NEXT:  1: 0
+; CHECK-NEXT:  2: 15
+; CHECK-NEXT:  3: 15
+; CHECK-NEXT:  4: 15
+; CHECK-NEXT:  5: 0
+; CHECK-NEXT:  6: 15
+; CHECK-NEXT:  7: 0
+; CHECK-NEXT:  8: 15 bar:15
+; CHECK-NEXT:  9: 0
+; CHECK-NEXT:  !CFGChecksum: 563088904013236
+; CHECK-NEXT: bar:30:15
+; CHECK-NEXT:  1: 15
+; CHECK-NEXT:  4: 15
+; CHECK-NEXT:  !CFGChecksum: 72617220756
+
+
+
+; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
+; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
+; -fno-inline-functions -g test.c  -o a.out
+
+#include <stdio.h>
+
+int bar(int x, int y) {
+  if (x % 3) {
+    return x - y;
+  }
+  return x + y;
+}
+
+void foo() {
+  int s, i = 0;
+  while (i++ < 4000 * 4000)
+    if (i % 91) s = bar(i, s); else s += 30;
+  printf("sum is %d\n", s);
+}
+
+int main() {
+  foo();
+  return 0;
+}

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 42c8f3b12e2c8..61e5646b66bfa 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -383,9 +383,7 @@ ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
 
 void ProfileGenerator::generateProfile() {
   if (Binary->usePseudoProbes()) {
-    // TODO: Support probe based profile generation
-    exitWithError("Probe based profile generation not supported for AutoFDO, "
-      "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`.");
+    generateProbeBasedProfile();
   } else {
     generateLineNumBasedProfile();
   }
@@ -427,12 +425,80 @@ void ProfileGenerator::generateLineNumBasedProfile() {
   updateTotalSamples();
 }
 
+void ProfileGenerator::generateProbeBasedProfile() {
+  assert(SampleCounters.size() == 1 &&
+         "Must have one entry for profile generation.");
+  // Enable pseudo probe functionalities in SampleProf
+  FunctionSamples::ProfileIsProbeBased = true;
+  const SampleCounter &SC = SampleCounters.begin()->second;
+  // Fill in function body samples
+  populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
+  // Fill in boundary sample counts as well as call site samples for calls
+  populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
+
+  updateTotalSamples();
+}
+
+void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
+    const RangeSample &RangeCounter) {
+  ProbeCounterMap ProbeCounter;
+  // preprocessRangeCounter returns disjoint ranges, so no longer to redo it inside
+  // extractProbesFromRange.
+  extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, false);
+
+  for (const auto &PI : ProbeCounter) {
+    const MCDecodedPseudoProbe *Probe = PI.first;
+    uint64_t Count = PI.second;
+    SampleContextFrameVector FrameVec;
+    Binary->getInlineContextForProbe(Probe, FrameVec, true);
+    FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count);
+    FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
+    if (Probe->isEntry())
+      FunctionProfile.addHeadSamples(Count);
+  }
+}
+
+void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
+    const BranchSample &BranchCounters) {
+  for (const auto &Entry : BranchCounters) {
+    uint64_t SourceOffset = Entry.first.first;
+    uint64_t TargetOffset = Entry.first.second;
+    uint64_t Count = Entry.second;
+    assert(Count != 0 && "Unexpected zero weight branch");
+
+    StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
+    if (CalleeName.size() == 0)
+      continue;
+
+    uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
+    const MCDecodedPseudoProbe *CallProbe =
+        Binary->getCallProbeForAddr(SourceAddress);
+    if (CallProbe == nullptr)
+      continue;
+
+    // Record called target sample and its count.
+    SampleContextFrameVector FrameVec;
+    Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
+
+    if (!FrameVec.empty()) {
+      FunctionSamples &FunctionProfile =
+          getLeafProfileAndAddTotalSamples(FrameVec, 0);
+      FunctionProfile.addCalledTargetSamples(
+          FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
+    }
+  }
+}
+
 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
     const SampleContextFrameVector &FrameVec, uint64_t Count) {
   // Get top level profile
   FunctionSamples *FunctionProfile =
       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
   FunctionProfile->addTotalSamples(Count);
+  if (Binary->usePseudoProbes()) {
+    const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName()));
+    FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
+  }
 
   for (size_t I = 1; I < FrameVec.size(); I++) {
     LineLocation Callsite(
@@ -448,6 +514,10 @@ FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
     }
     FunctionProfile = &Ret.first->second;
     FunctionProfile->addTotalSamples(Count);
+    if (Binary->usePseudoProbes()) {
+      const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName()));
+      FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
+    }
   }
 
   return *FunctionProfile;
@@ -580,8 +650,6 @@ void CSProfileGenerator::generateProfile() {
     computeSizeForProfiledFunctions();
 
   if (Binary->usePseudoProbes()) {
-    // Enable pseudo probe functionalities in SampleProf
-    FunctionSamples::ProfileIsProbeBased = true;
     generateProbeBasedProfile();
   } else {
     generateLineNumBasedProfile();
@@ -804,43 +872,20 @@ void ProfileGeneratorBase::computeSummaryAndThreshold() {
       (Summary->getDetailedSummary()));
 }
 
-// Helper function to extract context prefix string stack
-// Extract context stack for reusing, leaf context stack will
-// be added compressed while looking up function profile
-static void extractPrefixContextStack(
-    SampleContextFrameVector &ContextStack,
-    const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
-    ProfiledBinary *Binary) {
-  for (const auto *P : Probes) {
-    Binary->getInlineContextForProbe(P, ContextStack, true);
-  }
-}
-
-void CSProfileGenerator::generateProbeBasedProfile() {
-  for (const auto &CI : SampleCounters) {
-    const auto *CtxKey = cast<ProbeBasedCtxKey>(CI.first.getPtr());
-    SampleContextFrameVector ContextStack;
-    extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
-    // Fill in function body samples from probes, also infer caller's samples
-    // from callee's probe
-    populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
-    // Fill in boundary samples for a call probe
-    populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
+void ProfileGeneratorBase::extractProbesFromRange(
+    const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
+    bool FindDisjointRanges) {
+  const RangeSample *PRanges = &RangeCounter;
+  RangeSample Ranges;
+  if (FindDisjointRanges) {
+    findDisjointRanges(Ranges, RangeCounter);
+    PRanges = &Ranges;
   }
-}
 
-void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
-                                                ProbeCounterMap &ProbeCounter) {
-  RangeSample Ranges;
-  findDisjointRanges(Ranges, RangeCounter);
-  for (const auto &Range : Ranges) {
+  for (const auto &Range : *PRanges) {
     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
     uint64_t Count = Range.second;
-    // Disjoint ranges have introduce zero-filled gap that
-    // doesn't belong to current context, filter them out.
-    if (Count == 0)
-      continue;
 
     InstructionPointer IP(Binary, RangeBegin, true);
     // Disjoint ranges may have range in the middle of two instr,
@@ -855,8 +900,6 @@ void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
       auto It = Address2ProbesMap.find(IP.Address);
       if (It != Address2ProbesMap.end()) {
         for (const auto &Probe : It->second) {
-          if (!Probe.isBlock())
-            continue;
           ProbeCounter[&Probe] += Count;
         }
       }
@@ -864,6 +907,33 @@ void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
   }
 }
 
+// Helper function to extract context prefix string stack
+// Extract context stack for reusing, leaf context stack will
+// be added compressed while looking up function profile
+static void extractPrefixContextStack(
+    SampleContextFrameVector &ContextStack,
+    const SmallVectorImpl<const MCDecodedPseudoProbe *> &Probes,
+    ProfiledBinary *Binary) {
+  for (const auto *P : Probes) {
+    Binary->getInlineContextForProbe(P, ContextStack, true);
+  }
+}
+
+void CSProfileGenerator::generateProbeBasedProfile() {
+  // Enable pseudo probe functionalities in SampleProf
+  FunctionSamples::ProfileIsProbeBased = true;
+  for (const auto &CI : SampleCounters) {
+    const auto *CtxKey = cast<ProbeBasedCtxKey>(CI.first.getPtr());
+    SampleContextFrameVector ContextStack;
+    extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary);
+    // Fill in function body samples from probes, also infer caller's samples
+    // from callee's probe
+    populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
+    // Fill in boundary samples for a call probe
+    populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
+  }
+}
+
 void CSProfileGenerator::populateBodySamplesWithProbes(
     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
   ProbeCounterMap ProbeCounter;
@@ -876,6 +946,10 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
   for (const auto &PI : ProbeCounter) {
     const MCDecodedPseudoProbe *Probe = PI.first;
     uint64_t Count = PI.second;
+    // Disjoint ranges have introduce zero-filled gap that
+    // doesn't belong to current context, filter them out.
+    if (!Probe->isBlock() || Count == 0)
+      continue;
     FunctionSamples &FunctionProfile =
         getFunctionProfileForLeafProbe(ContextStack, Probe);
     // Record the current frame and FunctionProfile whenever samples are

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index af349ac9911ad..996a2716a3d90 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -22,6 +22,9 @@ using namespace sampleprof;
 namespace llvm {
 namespace sampleprof {
 
+using ProbeCounterMap =
+    std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
+
 // This base class for profile generation of sample-based PGO. We reuse all
 // structures relating to function profiles and profile writers as seen in
 // /ProfileData/SampleProf.h.
@@ -77,6 +80,13 @@ class ProfileGeneratorBase {
   */
   void findDisjointRanges(RangeSample &DisjointRanges,
                           const RangeSample &Ranges);
+
+  // Go through each address from range to extract the top frame probe by
+  // looking up in the Address2ProbeMap
+  void extractProbesFromRange(const RangeSample &RangeCounter,
+                              ProbeCounterMap &ProbeCounter,
+                              bool FindDisjointRanges = true);
+
   // Helper function for updating body sample for a leaf location in
   // FunctionProfile
   void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
@@ -118,6 +128,7 @@ class ProfileGenerator : public ProfileGeneratorBase {
 
 private:
   void generateLineNumBasedProfile();
+  void generateProbeBasedProfile();
   RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
   FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName);
   // Helper function to get the leaf frame's FunctionProfile by traversing the
@@ -129,14 +140,14 @@ class ProfileGenerator : public ProfileGeneratorBase {
   void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
   void
   populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
+  void populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
+  void
+  populateBoundarySamplesWithProbesForAllFunctions(const BranchSample &BranchCounters);
   void postProcessProfiles();
   void trimColdProfiles(const SampleProfileMap &Profiles,
                         uint64_t ColdCntThreshold);
 };
 
-using ProbeCounterMap =
-    std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
-
 class CSProfileGenerator : public ProfileGeneratorBase {
 public:
   CSProfileGenerator(ProfiledBinary *Binary,
@@ -281,10 +292,7 @@ class CSProfileGenerator : public ProfileGeneratorBase {
   void populateInferredFunctionSamples();
 
   void generateProbeBasedProfile();
-  // Go through each address from range to extract the top frame probe by
-  // looking up in the Address2ProbeMap
-  void extractProbesFromRange(const RangeSample &RangeCounter,
-                              ProbeCounterMap &ProbeCounter);
+
   // Fill in function body samples from probes
   void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
                                      SampleContextFrames ContextStack);


        


More information about the llvm-commits mailing list