[llvm] r332058 - [SampleFDO] Don't treat warm callsite with inline instance in the profile as cold

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Thu May 10 16:02:27 PDT 2018


Author: wmi
Date: Thu May 10 16:02:27 2018
New Revision: 332058

URL: http://llvm.org/viewvc/llvm-project?rev=332058&view=rev
Log:
[SampleFDO] Don't treat warm callsite with inline instance in the profile as cold

We found current sampleFDO had a performance issue when triaging a regression.
For a callsite with inline instance in the profile, even if hot callsite inliner
cannot inline it, it may still execute enough times and should not be treated as
cold in regular inliner later. However, currently if such callsite is not inlined
by hot callsite inliner, and the BB where the callsite locates doesn't get
samples from other instructions inside of it, the callsite will have no profile
metadata annotated. In regular inliner cost analysis, if the callsite has no
profile annotated and its caller has profile information, it will be treated as
cold.

The fix changes the isCallsiteHot check and chooses to compare
CallsiteTotalSamples with hot cutoff value computed by ProfileSummaryInfo.

Differential Revision: https://reviews.llvm.org/D45377

Added:
    llvm/trunk/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof
    llvm/trunk/test/Transforms/SampleProfile/warm-inline-instance.ll
Modified:
    llvm/trunk/include/llvm/Analysis/ProfileSummaryInfo.h
    llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
    llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
    llvm/trunk/test/Transforms/SampleProfile/function_metadata.ll
    llvm/trunk/test/Transforms/SampleProfile/inline.ll

Modified: llvm/trunk/include/llvm/Analysis/ProfileSummaryInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ProfileSummaryInfo.h?rev=332058&r1=332057&r2=332058&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/ProfileSummaryInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/ProfileSummaryInfo.h Thu May 10 16:02:27 2018
@@ -110,6 +110,12 @@ public:
   bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
   /// Returns true if Callsite \p CS is considered cold.
   bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
+  /// Returns HotCountThreshold if set. Recompute HotCountThreshold
+  /// if not set.
+  uint64_t getOrCompHotCountThreshold();
+  /// Returns ColdCountThreshold if set. Recompute HotCountThreshold
+  /// if not set.
+  uint64_t getOrCompColdCountThreshold();
   /// Returns HotCountThreshold if set.
   uint64_t getHotCountThreshold() {
     return HotCountThreshold ? HotCountThreshold.getValue() : 0;

Modified: llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp?rev=332058&r1=332057&r2=332058&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp Thu May 10 16:02:27 2018
@@ -223,6 +223,18 @@ bool ProfileSummaryInfo::isColdCount(uin
   return ColdCountThreshold && C <= ColdCountThreshold.getValue();
 }
 
+uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() {
+  if (!HotCountThreshold)
+    computeThresholds();
+  return HotCountThreshold && HotCountThreshold.getValue();
+}
+
+uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() {
+  if (!ColdCountThreshold)
+    computeThresholds();
+  return ColdCountThreshold && ColdCountThreshold.getValue();
+}
+
 bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
   auto Count = BFI->getBlockProfileCount(B);
   return Count && isHotCount(*Count);

Modified: llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp?rev=332058&r1=332057&r2=332058&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp Thu May 10 16:02:27 2018
@@ -37,6 +37,7 @@
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -109,11 +110,6 @@ static cl::opt<unsigned> SampleProfileSa
     cl::desc("Emit a warning if less than N% of samples in the input profile "
              "are matched to the IR."));
 
-static cl::opt<double> SampleProfileHotThreshold(
-    "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
-    cl::desc("Inlined functions that account for more than N% of all samples "
-             "collected in the parent function, will be inlined again."));
-
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -130,10 +126,13 @@ public:
   bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
                        uint32_t Discriminator, uint64_t Samples);
   unsigned computeCoverage(unsigned Used, unsigned Total) const;
-  unsigned countUsedRecords(const FunctionSamples *FS) const;
-  unsigned countBodyRecords(const FunctionSamples *FS) const;
+  unsigned countUsedRecords(const FunctionSamples *FS,
+                            ProfileSummaryInfo *PSI) const;
+  unsigned countBodyRecords(const FunctionSamples *FS,
+                            ProfileSummaryInfo *PSI) const;
   uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
-  uint64_t countBodySamples(const FunctionSamples *FS) const;
+  uint64_t countBodySamples(const FunctionSamples *FS,
+                            ProfileSummaryInfo *PSI) const;
 
   void clear() {
     SampleCoverage.clear();
@@ -186,7 +185,8 @@ public:
         IsThinLTOPreLink(IsThinLTOPreLink) {}
 
   bool doInitialization(Module &M);
-  bool runOnModule(Module &M, ModuleAnalysisManager *AM);
+  bool runOnModule(Module &M, ModuleAnalysisManager *AM,
+                   ProfileSummaryInfo *_PSI);
 
   void dump() { Reader->dump(); }
 
@@ -285,6 +285,9 @@ protected:
   /// Instead, we will mark GUIDs that needs to be annotated to the function.
   bool IsThinLTOPreLink;
 
+  /// Profile Summary Info computed from sample profile.
+  ProfileSummaryInfo *PSI = nullptr;
+
   /// Total number of samples collected in this profile.
   ///
   /// This is the sum of all the samples collected in all the functions executed
@@ -325,6 +328,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
   }
 
 private:
@@ -335,7 +339,7 @@ private:
 
 } // end anonymous namespace
 
-/// Return true if the given callsite is hot wrt to its caller.
+/// Return true if the given callsite is hot wrt to hot cutoff threshold.
 ///
 /// Functions that were inlined in the original binary will be represented
 /// in the inline stack in the sample profile. If the profile shows that
@@ -343,28 +347,17 @@ private:
 /// frequently), then we will recreate the inline decision and apply the
 /// profile from the inlined callsite.
 ///
-/// To decide whether an inlined callsite is hot, we compute the fraction
-/// of samples used by the callsite with respect to the total number of samples
-/// collected in the caller.
-///
-/// If that fraction is larger than the default given by
-/// SampleProfileHotThreshold, the callsite will be inlined again.
-static bool callsiteIsHot(const FunctionSamples *CallerFS,
-                          const FunctionSamples *CallsiteFS) {
+/// To decide whether an inlined callsite is hot, we compare the callsite
+/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
+/// regarded as hot if the count is above the cutoff value.
+static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
+                          ProfileSummaryInfo *PSI) {
   if (!CallsiteFS)
     return false; // The callsite was not inlined in the original binary.
 
-  uint64_t ParentTotalSamples = CallerFS->getTotalSamples();
-  if (ParentTotalSamples == 0)
-    return false; // Avoid division by zero.
-
+  assert(PSI && "PSI is expected to be non null");
   uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
-  if (CallsiteTotalSamples == 0)
-    return false; // Callsite is trivially cold.
-
-  double PercentSamples =
-      (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
-  return PercentSamples >= SampleProfileHotThreshold;
+  return PSI->isHotCount(CallsiteTotalSamples);
 }
 
 /// Mark as used the sample record for the given function samples at
@@ -387,7 +380,8 @@ bool SampleCoverageTracker::markSamplesU
 ///
 /// This count does not include records from cold inlined callsites.
 unsigned
-SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
+SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
+                                        ProfileSummaryInfo *PSI) const {
   auto I = SampleCoverage.find(FS);
 
   // The size of the coverage map for FS represents the number of records
@@ -400,8 +394,8 @@ SampleCoverageTracker::countUsedRecords(
   for (const auto &I : FS->getCallsiteSamples())
     for (const auto &J : I.second) {
       const FunctionSamples *CalleeSamples = &J.second;
-      if (callsiteIsHot(FS, CalleeSamples))
-        Count += countUsedRecords(CalleeSamples);
+      if (callsiteIsHot(CalleeSamples, PSI))
+        Count += countUsedRecords(CalleeSamples, PSI);
     }
 
   return Count;
@@ -411,15 +405,16 @@ SampleCoverageTracker::countUsedRecords(
 ///
 /// This count does not include records from cold inlined callsites.
 unsigned
-SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
+SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
+                                        ProfileSummaryInfo *PSI) const {
   unsigned Count = FS->getBodySamples().size();
 
   // Only count records in hot callsites.
   for (const auto &I : FS->getCallsiteSamples())
     for (const auto &J : I.second) {
       const FunctionSamples *CalleeSamples = &J.second;
-      if (callsiteIsHot(FS, CalleeSamples))
-        Count += countBodyRecords(CalleeSamples);
+      if (callsiteIsHot(CalleeSamples, PSI))
+        Count += countBodyRecords(CalleeSamples, PSI);
     }
 
   return Count;
@@ -429,7 +424,8 @@ SampleCoverageTracker::countBodyRecords(
 ///
 /// This count does not include samples from cold inlined callsites.
 uint64_t
-SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
+SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
+                                        ProfileSummaryInfo *PSI) const {
   uint64_t Total = 0;
   for (const auto &I : FS->getBodySamples())
     Total += I.second.getSamples();
@@ -438,8 +434,8 @@ SampleCoverageTracker::countBodySamples(
   for (const auto &I : FS->getCallsiteSamples())
     for (const auto &J : I.second) {
       const FunctionSamples *CalleeSamples = &J.second;
-      if (callsiteIsHot(FS, CalleeSamples))
-        Total += countBodySamples(CalleeSamples);
+      if (callsiteIsHot(CalleeSamples, PSI))
+        Total += countBodySamples(CalleeSamples, PSI);
     }
 
   return Total;
@@ -767,7 +763,7 @@ bool SampleProfileLoader::inlineHotFunct
         if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
             !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
           Candidates.push_back(&I);
-          if (callsiteIsHot(Samples, FS))
+          if (callsiteIsHot(FS, PSI))
             Hot = true;
         }
       }
@@ -787,8 +783,7 @@ bool SampleProfileLoader::inlineHotFunct
         for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
           if (IsThinLTOPreLink) {
             FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
-                                     Samples->getTotalSamples() *
-                                         SampleProfileHotThreshold / 100);
+                                     PSI->getOrCompHotCountThreshold());
             continue;
           }
           auto CalleeFunctionName = FS->getName();
@@ -827,8 +822,7 @@ bool SampleProfileLoader::inlineHotFunct
           LocalChanged = true;
       } else if (IsThinLTOPreLink) {
         findCalleeFunctionSamples(*I)->findInlinedFunctions(
-            InlinedGUIDs, F.getParent(),
-            Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
+            InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
       }
     }
     if (LocalChanged) {
@@ -1463,8 +1457,8 @@ bool SampleProfileLoader::emitAnnotation
 
   // If coverage checking was requested, compute it now.
   if (SampleProfileRecordCoverage) {
-    unsigned Used = CoverageTracker.countUsedRecords(Samples);
-    unsigned Total = CoverageTracker.countBodyRecords(Samples);
+    unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
+    unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
     unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
     if (Coverage < SampleProfileRecordCoverage) {
       F.getContext().diagnose(DiagnosticInfoSampleProfile(
@@ -1477,7 +1471,7 @@ bool SampleProfileLoader::emitAnnotation
 
   if (SampleProfileSampleCoverage) {
     uint64_t Used = CoverageTracker.getTotalUsedSamples();
-    uint64_t Total = CoverageTracker.countBodySamples(Samples);
+    uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
     unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
     if (Coverage < SampleProfileSampleCoverage) {
       F.getContext().diagnose(DiagnosticInfoSampleProfile(
@@ -1496,6 +1490,7 @@ INITIALIZE_PASS_BEGIN(SampleProfileLoade
                       "Sample Profile loader", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
                     "Sample Profile loader", false, false)
 
@@ -1520,10 +1515,15 @@ ModulePass *llvm::createSampleProfileLoa
   return new SampleProfileLoaderLegacyPass(Name);
 }
 
-bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {
+bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
+                                      ProfileSummaryInfo *_PSI) {
   if (!ProfileIsValid)
     return false;
 
+  PSI = _PSI;
+  if (M.getProfileSummary() == nullptr)
+    M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
+
   // Compute the total number of samples collected in this profile.
   for (const auto &I : Reader->getProfiles())
     TotalCollectedSamples += I.second.getTotalSamples();
@@ -1554,15 +1554,15 @@ bool SampleProfileLoader::runOnModule(Mo
       clearFunctionData();
       retval |= runOnFunction(F, AM);
     }
-  if (M.getProfileSummary() == nullptr)
-    M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
   return retval;
 }
 
 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
   ACT = &getAnalysis<AssumptionCacheTracker>();
   TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
-  return SampleLoader.runOnModule(M, nullptr);
+  ProfileSummaryInfo *PSI =
+      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  return SampleLoader.runOnModule(M, nullptr, PSI);
 }
 
 bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
@@ -1604,7 +1604,8 @@ PreservedAnalyses SampleProfileLoaderPas
 
   SampleLoader.doInitialization(M);
 
-  if (!SampleLoader.runOnModule(M, &AM))
+  ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+  if (!SampleLoader.runOnModule(M, &AM, PSI))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof?rev=332058&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof Thu May 10 16:02:27 2018
@@ -0,0 +1,11 @@
+main:2257150:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: foo:5860
+  0: 5279
+  1: 5279
+  2: 5279
+ 4.1: goo:60
+  0: 20
+  1: 20
+  2: 20

Modified: llvm/trunk/test/Transforms/SampleProfile/function_metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/function_metadata.ll?rev=332058&r1=332057&r2=332058&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/function_metadata.ll (original)
+++ llvm/trunk/test/Transforms/SampleProfile/function_metadata.ll Thu May 10 16:02:27 2018
@@ -28,7 +28,7 @@ define void @test_liveness() !dbg !12 {
 
 ; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to
 ; make sure hot inline stacks are imported.
-; CHECK: ![[ENTRY_TEST]] = !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905,  i64 -7908226060800700466, i64 -2012135647395072713}
+; CHECK: ![[ENTRY_TEST]] = !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7546896869197086323, i64 7682762345278052905,  i64 -7908226060800700466, i64 -2012135647395072713}
 
 ; Check GUIDs for both foo and foo_available are included in the metadata to
 ; make sure the liveness analysis can capture the dependency from test_liveness

Modified: llvm/trunk/test/Transforms/SampleProfile/inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/inline.ll?rev=332058&r1=332057&r2=332058&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/inline.ll (original)
+++ llvm/trunk/test/Transforms/SampleProfile/inline.ll Thu May 10 16:02:27 2018
@@ -1,5 +1,5 @@
-; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -sample-profile-inline-hot-threshold=1 -S | FileCheck %s
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -sample-profile-inline-hot-threshold=1 -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
 
 ; Original C++ test case
 ;

Added: llvm/trunk/test/Transforms/SampleProfile/warm-inline-instance.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/warm-inline-instance.ll?rev=332058&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/warm-inline-instance.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/warm-inline-instance.ll Thu May 10 16:02:27 2018
@@ -0,0 +1,115 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/warm-inline-instance.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/warm-inline-instance.prof -S | FileCheck %s
+
+ at .str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32 %x, i32 %y) !dbg !4 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %t0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %t1 = load i32, i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %t0, %t1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+define i32 @goo(i32 %x, i32 %y) {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %t0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %t1 = load i32, i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %t0, %t1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() !dbg !7 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !12
+  br label %while.cond, !dbg !13
+
+while.cond:                                       ; preds = %if.end, %entry
+  %t0 = load i32, i32* %i, align 4, !dbg !14
+  %inc = add nsw i32 %t0, 1, !dbg !14
+  store i32 %inc, i32* %i, align 4, !dbg !14
+  %cmp = icmp slt i32 %t0, 400000000, !dbg !14
+  br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body:                                       ; preds = %while.cond
+  %t1 = load i32, i32* %i, align 4, !dbg !16
+  %cmp1 = icmp ne i32 %t1, 100, !dbg !16
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+if.then:                                          ; preds = %while.body
+  %t2 = load i32, i32* %i, align 4, !dbg !18
+  %t3 = load i32, i32* %s, align 4, !dbg !18
+; Although the ratio of total samples of @foo vs total samples of @main is
+; small, since the total samples count is larger than hot cutoff computed by
+; ProfileSummaryInfo, we will still regard the callsite of foo as hot and
+; early inlining will inline it.
+; CHECK-LABEL: @main(
+; CHECK-NOT: call i32 @foo(i32 %t2, i32 %t3)
+  %call1 = call i32 @foo(i32 %t2, i32 %t3), !dbg !18
+  store i32 %call1, i32* %s, align 4, !dbg !18
+  br label %if.end, !dbg !18
+
+if.else:                                          ; preds = %while.body
+; call @goo 's basicblock doesn't get any sample, so no profile will be annotated.
+; CHECK: call i32 @goo(i32 2, i32 3), !dbg !{{[0-9]+}}
+; CHECK-NOT: !prof
+; CHECK-SAME: {{$}}
+  %call2 = call i32 @goo(i32 2, i32 3), !dbg !26
+  store i32 %call2, i32* %s, align 4, !dbg !20
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !22
+
+while.end:                                        ; preds = %while.cond
+  %t4 = load i32, i32* %s, align 4, !dbg !24
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %t4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)
+!26 = !DILocation(line: 11, scope: !19)




More information about the llvm-commits mailing list