[llvm] a5d3042 - [CSSPGO] Load context profile for external functions in PreLink and populate ThinLTO import list

Wenlei He via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 15 12:22:28 PDT 2021


Author: Wenlei He
Date: 2021-03-15T12:22:15-07:00
New Revision: a5d30421a62cee0217afeac194d111eba9adb15e

URL: https://github.com/llvm/llvm-project/commit/a5d30421a62cee0217afeac194d111eba9adb15e
DIFF: https://github.com/llvm/llvm-project/commit/a5d30421a62cee0217afeac194d111eba9adb15e.diff

LOG: [CSSPGO] Load context profile for external functions in PreLink and populate ThinLTO import list

For ThinLTO's prelink compilation, we need to put external inline candidates into an import list attached to function's entry count metadata. This enables ThinLink to treat such cross module callee as hot in summary index, and later helps postlink to import them for profile guided cross module inlining.

For AutoFDO, the import list is retrieved by traversing the nested inlinee functions. For CSSPGO, since profile is flatterned, a few things need to happen for it to work:

 - When loading input profile in extended binary format, we need to load all child context profile whose parent is in current module, so context trie for current module includes potential cross module inlinee.
 - In order to make the above happen, we need to know whether input profile is CSSPGO profile before start reading function profile, hence a flag for profile summary section is added.
 - When searching for cross module inline candidate, we need to walk through the context trie instead of nested inlinee profile (callsite sample of AutoFDO profile).
 - Now that we have more accurate counts with CSSPGO, we swtiched to use entry count instead of total count to decided if an external callee is potentially beneficial to inline. This make it consistent with how we determine whether call tagert is potential inline candidate.

Differential Revision: https://reviews.llvm.org/D98590

Added: 
    llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof
    llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof.extbin
    llvm/test/Transforms/SampleProfile/csspgo-import-list.ll

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
    llvm/lib/ProfileData/SampleProfReader.cpp
    llvm/lib/ProfileData/SampleProfWriter.cpp
    llvm/lib/Transforms/IPO/SampleProfile.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 18a0989ae537..70fdaff38504 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -187,7 +187,10 @@ enum class SecProfSummaryFlags : uint32_t {
   /// SecFlagPartial means the profile is for common/shared code.
   /// The common profile is usually merged from profiles collected
   /// from running other targets.
-  SecFlagPartial = (1 << 0)
+  SecFlagPartial = (1 << 0),
+  /// SecFlagContext means this is context-sensitive profile for
+  /// CSSPGO
+  SecFlagFullContext = (1 << 1)
 };
 
 enum class SecFuncMetadataFlags : uint32_t {
@@ -730,7 +733,7 @@ class FunctionSamples {
   /// corresponding function is no less than \p Threshold, add its corresponding
   /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
   /// to \p S.
-  void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
+  void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S,
                             const StringMap<Function *> &SymbolMap,
                             uint64_t Threshold) const {
     if (TotalSamples <= Threshold)
@@ -753,7 +756,7 @@ class FunctionSamples {
         }
     for (const auto &CS : CallsiteSamples)
       for (const auto &NameFS : CS.second)
-        NameFS.second.findInlinedFunctions(S, M, SymbolMap, Threshold);
+        NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold);
   }
 
   /// Set the name of the function.

diff  --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index 422651d0dd23..bc8f602795a9 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -115,6 +115,8 @@ class SampleContextTracker {
                                      bool MergeContext = true);
   // Query base profile for a given function by name.
   FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext);
+  // Retrieve the context trie node for given profile context
+  ContextTrieNode *getContextFor(const SampleContext &Context);
   // Mark a context profile as inlined when function is inlined.
   // This makes sure that inlined context profile will be excluded in
   // function's base profile.
@@ -127,7 +129,6 @@ class SampleContextTracker {
 
 private:
   ContextTrieNode *getContextFor(const DILocation *DIL);
-  ContextTrieNode *getContextFor(const SampleContext &Context);
   ContextTrieNode *getCalleeContextFor(const DILocation *DIL,
                                        StringRef CalleeName);
   ContextTrieNode *getOrCreateContextPath(const SampleContext &Context,

diff  --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 089ad7bcac0d..697d29f6f412 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -38,6 +38,7 @@
 #include <cstdint>
 #include <limits>
 #include <memory>
+#include <set>
 #include <system_error>
 #include <vector>
 
@@ -577,6 +578,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
       return EC;
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
       Summary->setPartialProfile(true);
+    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
+      FunctionSamples::ProfileIsCS = ProfileIsCS = true;
     break;
   case SecNameTable: {
     FixedLengthMD5 =
@@ -687,6 +690,46 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
           return EC;
       }
+    } else if (FunctionSamples::ProfileIsCS) {
+      // Compute the ordered set of names, so we can
+      // get all context profiles under a subtree by
+      // iterating through the ordered names.
+      struct Comparer {
+        // Ignore the closing ']' when ordering context
+        bool operator()(const StringRef &L, const StringRef &R) const {
+          return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1);
+        }
+      };
+      std::set<StringRef, Comparer> OrderedNames;
+      for (auto Name : FuncOffsetTable) {
+        OrderedNames.insert(Name.first);
+      }
+
+      // For each function in current module, load all
+      // context profiles for the function.
+      for (auto NameOffset : FuncOffsetTable) {
+        StringRef ContextName = NameOffset.first;
+        SampleContext FContext(ContextName);
+        auto FuncName = FContext.getNameWithoutContext();
+        if (!FuncsToUse.count(FuncName) &&
+            (!Remapper || !Remapper->exist(FuncName)))
+          continue;
+
+        // For each context profile we need, try to load
+        // all context profile in the subtree. This can
+        // help profile guided importing for ThinLTO.
+        auto It = OrderedNames.find(ContextName);
+        while (It != OrderedNames.end() &&
+               It->startswith(ContextName.substr(0, ContextName.size() - 1))) {
+          const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It];
+          assert(FuncProfileAddr < End && "out of LBRProfile section");
+          if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+            return EC;
+          // Remove loaded context profile so we won't
+          // load it repeatedly.
+          It = OrderedNames.erase(It);
+        }
+      }
     } else {
       for (auto NameOffset : FuncOffsetTable) {
         SampleContext FContext(NameOffset.first);
@@ -704,8 +747,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
   }
   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
-  ProfileIsCS = (CSProfileCount > 0);
-  FunctionSamples::ProfileIsCS = ProfileIsCS;
+  assert(ProfileIsCS == (CSProfileCount > 0) &&
+         "Section flag should be consistent with actual profile");
   return sampleprof_error::success;
 }
 
@@ -1034,6 +1077,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
   case SecProfSummary:
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
       Flags.append("partial,");
+    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
+      Flags.append("context,");
     break;
   default:
     break;

diff  --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 8939d3a84596..7a00c3fec7c7 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -237,6 +237,8 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
     setToCompressSection(SecProfileSymbolList);
   if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased)
     addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased);
+  if (Type == SecProfSummary && FunctionSamples::ProfileIsCS)
+    addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext);
 
   uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
   switch (Type) {

diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 5e53e6394776..2ecff87f492f 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -365,6 +365,10 @@ class SampleProfileLoader final
   findFunctionSamples(const Instruction &I) const override;
   std::vector<const FunctionSamples *>
   findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
+  void findExternalInlineCandidate(const FunctionSamples *Samples,
+                                   DenseSet<GlobalValue::GUID> &InlinedGUIDs,
+                                   const StringMap<Function *> &SymbolMap,
+                                   uint64_t Threshold);
   // Attempt to promote indirect call and also inline the promoted call
   bool tryPromoteAndInlineCandidate(
       Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
@@ -922,6 +926,60 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
   }
 }
 
+void SampleProfileLoader::findExternalInlineCandidate(
+    const FunctionSamples *Samples, DenseSet<GlobalValue::GUID> &InlinedGUIDs,
+    const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
+  assert(Samples && "expect non-null caller profile");
+
+  // For AutoFDO profile, retrieve candidate profiles by walking over
+  // the nested inlinee profiles.
+  if (!ProfileIsCS) {
+    Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
+    return;
+  }
+
+  ContextTrieNode *Caller =
+      ContextTracker->getContextFor(Samples->getContext());
+  std::queue<ContextTrieNode *> CalleeList;
+  CalleeList.push(Caller);
+  while (!CalleeList.empty()) {
+    ContextTrieNode *Node = CalleeList.front();
+    CalleeList.pop();
+    FunctionSamples *CalleeSample = Node->getFunctionSamples();
+    // For CSSPGO profile, retrieve candidate profile by walking over the
+    // trie built for context profile. Note that also take call targets
+    // even if callee doesn't have a corresponding context profile.
+    if (!CalleeSample || CalleeSample->getEntrySamples() < Threshold)
+      continue;
+
+    StringRef Name = CalleeSample->getFuncName();
+    Function *Func = SymbolMap.lookup(Name);
+    // Add to the import list only when it's defined out of module.
+    if (!Func || Func->isDeclaration())
+      InlinedGUIDs.insert(FunctionSamples::getGUID(Name));
+
+    // Import hot CallTargets, which may not be available in IR because full
+    // profile annotation cannot be done until backend compilation in ThinLTO.
+    for (const auto &BS : CalleeSample->getBodySamples())
+      for (const auto &TS : BS.second.getCallTargets())
+        if (TS.getValue() > Threshold) {
+          StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
+          const Function *Callee = SymbolMap.lookup(CalleeName);
+          if (!Callee || Callee->isDeclaration())
+            InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName));
+        }
+
+    // Import hot child context profile associted with callees. Note that this
+    // may have some overlap with the call target loop above, but doing this
+    // based child context profile again effectively allow us to use the max of
+    // entry count and call target count to determine importing.
+    for (auto &Child : Node->getAllChildContext()) {
+      ContextTrieNode *CalleeNode = &Child.second;
+      CalleeList.push(CalleeNode);
+    }
+  }
+}
+
 /// Iteratively inline hot callsites of a function.
 ///
 /// Iteratively traverse all callsites of the function \p F, and find if
@@ -994,8 +1052,8 @@ bool SampleProfileLoader::inlineHotFunctions(
         for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
           uint64_t SumOrigin = Sum;
           if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
-            FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), SymbolMap,
-                                     PSI->getOrCompHotCountThreshold());
+            findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
+                                        PSI->getOrCompHotCountThreshold());
             continue;
           }
           if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
@@ -1014,9 +1072,9 @@ bool SampleProfileLoader::inlineHotFunctions(
           LocalChanged = true;
         }
       } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
-        findCalleeFunctionSamples(*I)->findInlinedFunctions(
-            InlinedGUIDs, F.getParent(), SymbolMap,
-            PSI->getOrCompHotCountThreshold());
+        findExternalInlineCandidate(findCalleeFunctionSamples(*I), InlinedGUIDs,
+                                    SymbolMap,
+                                    PSI->getOrCompHotCountThreshold());
       }
     }
     Changed |= LocalChanged;
@@ -1268,8 +1326,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
       for (const auto *FS : CalleeSamples) {
         // TODO: Consider disable pre-lTO ICP for MonoLTO as well
         if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
-          FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), SymbolMap,
-                                   PSI->getOrCompHotCountThreshold());
+          findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
+                                      PSI->getOrCompHotCountThreshold());
           continue;
         }
         uint64_t EntryCountDistributed =
@@ -1314,9 +1372,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
         Changed = true;
       }
     } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
-      findCalleeFunctionSamples(*I)->findInlinedFunctions(
-          InlinedGUIDs, F.getParent(), SymbolMap,
-          PSI->getOrCompHotCountThreshold());
+      findExternalInlineCandidate(Candidate.CalleeSamples, InlinedGUIDs,
+                                  SymbolMap, PSI->getOrCompHotCountThreshold());
     }
   }
 

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof b/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof
new file mode 100644
index 000000000000..c81f8a998b33
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof
@@ -0,0 +1,27 @@
+[main]:154:2
+ 2: 12
+ 3: 18 _Z5funcAi:11
+ 3.1: 18 _Z5funcBi:19
+[main:3.1 @ _Z5funcBi]:120:7040
+ 0: 7001
+ 1: 19 _Z8funcLeafi:9999
+ 3: 12
+[main:3.1 @ _Z5funcBi @ _Z5funcBiLeaf2]:1:9010
+ 0: 7001
+ 1: 19 _Z8funcLeafi3:9999
+ 3: 12
+[main:2 @ _Z5funcAi]:99:11
+ 0: 10
+ 1: 10 _Z8funcLeafi:11
+ 2: 287864 _Z3fibi:315608
+ 3: 24
+[main:3 @ _Z5funcCi]:23254:11
+ 0: 10
+ 1: 23250
+[main:3 @ _Z5funcDi]:23:45201
+ 0: 10
+ 1: 23250
+[main:2 @ _Z5funcAi:2 @ _Z3fibi]:120:101
+ 0: 99
+ 1: 6
+ 3: 97

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof.extbin b/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof.extbin
new file mode 100644
index 000000000000..f3bdfafc6716
Binary files /dev/null and b/llvm/test/Transforms/SampleProfile/Inputs/csspgo-import-list.prof.extbin 
diff er

diff  --git a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
new file mode 100644
index 000000000000..c4b68d5ed7c1
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
@@ -0,0 +1,78 @@
+; Make sure Import GUID list for ThinLTO properly set for CSSPGO
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -S | FileCheck %s
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof.extbin -S | FileCheck %s
+
+declare i32 @_Z5funcBi(i32 %x)
+declare i32 @_Z5funcAi(i32 %x)
+
+define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
+entry:
+  br label %for.body, !dbg !25
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 %add3, !dbg !27
+
+for.body:                                         ; preds = %for.body, %entry
+  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
+  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
+  %add = add nuw nsw i32 %x.011, 1, !dbg !31
+  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
+  %add2 = add i32 %call, %r.010, !dbg !34
+  %add3 = add i32 %add2, %call1, !dbg !35
+  %dec = add nsw i32 %x.011, -1, !dbg !36
+  %cmp = icmp eq i32 %x.011, 0, !dbg !38
+  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
+}
+
+; Make sure the ImportGUID stays with entry count metadata for ThinLTO-PreLink
+; CHECK: distinct !DISubprogram(name: "main"
+; CHECK: !{!"function_entry_count", i64 3, i64 446061515086924981, i64 3815895320998406042, i64 7102633082150537521, i64 -2862076748587597320}
+
+
+attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
+!4 = !{}
+!5 = !{!6, !10, !11}
+!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!12 = !{!0}
+!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
+!14 = !{i32 7, !"Dwarf Version", i32 4}
+!15 = !{i32 2, !"Debug Info Version", i32 3}
+!16 = !{i32 1, !"wchar_size", i32 4}
+!17 = !{!"clang version 11.0.0"}
+!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!9}
+!21 = !{!22, !23}
+!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
+!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
+!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
+!25 = !DILocation(line: 13, column: 3, scope: !26)
+!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
+!27 = !DILocation(line: 17, column: 3, scope: !18)
+!28 = !DILocation(line: 13, column: 10, scope: !29)
+!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
+!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
+!31 = !DILocation(line: 14, column: 29, scope: !29)
+!32 = !DILocation(line: 14, column: 21, scope: !33)
+!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
+!34 = !DILocation(line: 14, column: 19, scope: !29)
+!35 = !DILocation(line: 14, column: 7, scope: !29)
+!36 = !DILocation(line: 13, column: 33, scope: !37)
+!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
+!38 = !DILocation(line: 13, column: 26, scope: !39)
+!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index d4233a804e24..77416d2ff989 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -198,6 +198,33 @@ CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr) {
   return Ret.first->second;
 }
 
+void CSProfileGenerator::generateProfile() {
+  FunctionSamples::ProfileIsCS = true;
+  for (const auto &BI : BinarySampleCounters) {
+    ProfiledBinary *Binary = BI.first;
+    for (const auto &CI : BI.second) {
+      const StringBasedCtxKey *CtxKey =
+          dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
+      StringRef ContextId(CtxKey->Context);
+      // Get or create function profile for the range
+      FunctionSamples &FunctionProfile =
+          getFunctionProfileForContext(ContextId);
+
+      // Fill in function body samples
+      populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
+                                  Binary);
+      // Fill in boundary sample counts as well as call site samples for calls
+      populateFunctionBoundarySamples(ContextId, FunctionProfile,
+                                      CI.second.BranchCounter, Binary);
+    }
+  }
+  // Fill in call site value sample for inlined calls and also use context to
+  // infer missing samples. Since we don't have call count for inlined
+  // functions, we estimate it from inlinee's profile using the entry of the
+  // body sample.
+  populateInferredFunctionSamples();
+}
+
 void CSProfileGenerator::updateBodySamplesforFunctionProfile(
     FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
     uint64_t Count) {
@@ -422,6 +449,7 @@ extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
 void PseudoProbeCSProfileGenerator::generateProfile() {
   // Enable pseudo probe functionalities in SampleProf
   FunctionSamples::ProfileIsProbeBased = true;
+  FunctionSamples::ProfileIsCS = true;
   for (const auto &BI : BinarySampleCounters) {
     ProfiledBinary *Binary = BI.first;
     for (const auto &CI : BI.second) {

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index ff014ed79ce1..4ea459e7dabb 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -65,31 +65,7 @@ class CSProfileGenerator : public ProfileGenerator {
       : BinarySampleCounters(Counters){};
 
 public:
-  void generateProfile() override {
-    for (const auto &BI : BinarySampleCounters) {
-      ProfiledBinary *Binary = BI.first;
-      for (const auto &CI : BI.second) {
-        const StringBasedCtxKey *CtxKey =
-            dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
-        StringRef ContextId(CtxKey->Context);
-        // Get or create function profile for the range
-        FunctionSamples &FunctionProfile =
-            getFunctionProfileForContext(ContextId);
-
-        // Fill in function body samples
-        populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
-                                    Binary);
-        // Fill in boundary sample counts as well as call site samples for calls
-        populateFunctionBoundarySamples(ContextId, FunctionProfile,
-                                        CI.second.BranchCounter, Binary);
-      }
-    }
-    // Fill in call site value sample for inlined calls and also use context to
-    // infer missing samples. Since we don't have call count for inlined
-    // functions, we estimate it from inlinee's profile using the entry of the
-    // body sample.
-    populateInferredFunctionSamples();
-  }
+  void generateProfile() override;
 
   // Remove adjacent repeated context sequences up to a given sequence length,
   // -1 means no size limit. Note that repeated sequences are identified based


        


More information about the llvm-commits mailing list