[llvm] r300240 - SamplePGO: convert callsite samples map key from callsite_location to callsite_location+callee_name

Dehao Chen via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 13 12:52:11 PDT 2017


Author: dehao
Date: Thu Apr 13 14:52:10 2017
New Revision: 300240

URL: http://llvm.org/viewvc/llvm-project?rev=300240&view=rev
Log:
SamplePGO: convert callsite samples map key from callsite_location to callsite_location+callee_name

Summary: For iterative SamplePGO, an indirect call can be speculatively promoted to multiple direct calls and get inlined. All these promoted direct calls will share the same callsite location (offset+discriminator). With the current implementation, we cannot distinguish between different promotion candidates and its inlined instance. This patch adds callee_name to the key of the callsite sample map. And added helper functions to get all inlined callee samples for a given callsite location. This helps the profile annotator promote correct targets and inline it before annotation, and ensures all indirect call targets to be annotated correctly.

Reviewers: davidxl, dnovillo

Reviewed By: davidxl

Subscribers: andreadb, llvm-commits

Differential Revision: https://reviews.llvm.org/D31950

Modified:
    llvm/trunk/include/llvm/ProfileData/SampleProf.h
    llvm/trunk/lib/ProfileData/SampleProf.cpp
    llvm/trunk/lib/ProfileData/SampleProfReader.cpp
    llvm/trunk/lib/ProfileData/SampleProfWriter.cpp
    llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
    llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.prof
    llvm/trunk/test/Transforms/SampleProfile/indirect-call.ll

Modified: llvm/trunk/include/llvm/ProfileData/SampleProf.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ProfileData/SampleProf.h?rev=300240&r1=300239&r2=300240&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ProfileData/SampleProf.h (original)
+++ llvm/trunk/include/llvm/ProfileData/SampleProf.h Thu Apr 13 14:52:10 2017
@@ -184,7 +184,8 @@ raw_ostream &operator<<(raw_ostream &OS,
 
 typedef std::map<LineLocation, SampleRecord> BodySampleMap;
 class FunctionSamples;
-typedef std::map<LineLocation, FunctionSamples> CallsiteSampleMap;
+typedef StringMap<FunctionSamples> FunctionSamplesMap;
+typedef std::map<LineLocation, FunctionSamplesMap> CallsiteSampleMap;
 
 /// Representation of the samples collected for a function.
 ///
@@ -252,18 +253,41 @@ public:
   }
 
   /// Return the function samples at the given callsite location.
-  FunctionSamples &functionSamplesAt(const LineLocation &Loc) {
+  FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
     return CallsiteSamples[Loc];
   }
 
-  /// Return a pointer to function samples at the given callsite location.
-  const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc) const {
+  /// Returns the FunctionSamplesMap at the given \p Loc.
+  const FunctionSamplesMap *
+  findFunctionSamplesMapAt(const LineLocation &Loc) const {
     auto iter = CallsiteSamples.find(Loc);
-    if (iter == CallsiteSamples.end()) {
+    if (iter == CallsiteSamples.end())
       return nullptr;
-    } else {
-      return &iter->second;
-    }
+    return &iter->second;
+  }
+
+  /// Returns a pointer to FunctionSamples at the given callsite location \p Loc
+  /// with callee \p CalleeName. If no callsite can be found, relax the
+  /// restriction to return the FunctionSamples at callsite location \p Loc
+  /// with the maximum total sample count.
+  const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc,
+                                               StringRef CalleeName) const {
+    auto iter = CallsiteSamples.find(Loc);
+    if (iter == CallsiteSamples.end())
+      return nullptr;
+    auto FS = iter->second.find(CalleeName);
+    if (FS != iter->second.end())
+      return &FS->getValue();
+    // If we cannot find exact match of the callee name, return the FS with
+    // the max total count.
+    uint64_t MaxTotalSamples = 0;
+    const FunctionSamples *R = nullptr;
+    for (const auto &NameFS : iter->second)
+      if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
+        MaxTotalSamples = NameFS.second.getTotalSamples();
+        R = &NameFS.second;
+      }
+    return R;
   }
 
   bool empty() const { return TotalSamples == 0; }
@@ -297,8 +321,9 @@ public:
     }
     for (const auto &I : Other.getCallsiteSamples()) {
       const LineLocation &Loc = I.first;
-      const FunctionSamples &Rec = I.second;
-      MergeResult(Result, functionSamplesAt(Loc).merge(Rec, Weight));
+      FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
+      for (const auto &Rec : I.second)
+        MergeResult(Result, FSMap[Rec.first()].merge(Rec.second, Weight));
     }
     return Result;
   }
@@ -314,7 +339,8 @@ public:
     if (!F || !F->getSubprogram())
       S.insert(Function::getGUID(Name));
     for (auto CS : CallsiteSamples)
-      CS.second.findImportedFunctions(S, M, Threshold);
+      for (const auto &NameFS : CS.second)
+        NameFS.second.findImportedFunctions(S, M, Threshold);
   }
 
   /// Set the name of the function.

Modified: llvm/trunk/lib/ProfileData/SampleProf.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ProfileData/SampleProf.cpp?rev=300240&r1=300239&r2=300240&view=diff
==============================================================================
--- llvm/trunk/lib/ProfileData/SampleProf.cpp (original)
+++ llvm/trunk/lib/ProfileData/SampleProf.cpp Thu Apr 13 14:52:10 2017
@@ -129,12 +129,14 @@ void FunctionSamples::print(raw_ostream
   OS.indent(Indent);
   if (!CallsiteSamples.empty()) {
     OS << "Samples collected in inlined callsites {\n";
-    SampleSorter<LineLocation, FunctionSamples> SortedCallsiteSamples(
+    SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
         CallsiteSamples);
     for (const auto &CS : SortedCallsiteSamples.get()) {
-      OS.indent(Indent + 2);
-      OS << CS->first << ": inlined callee: " << CS->second.getName() << ": ";
-      CS->second.print(OS, Indent + 4);
+      for (const auto &FS : CS->second) {
+        OS.indent(Indent + 2);
+        OS << CS->first << ": inlined callee: " << FS.second.getName() << ": ";
+        FS.second.print(OS, Indent + 4);
+      }
     }
     OS << "}\n";
   } else {

Modified: llvm/trunk/lib/ProfileData/SampleProfReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ProfileData/SampleProfReader.cpp?rev=300240&r1=300239&r2=300240&view=diff
==============================================================================
--- llvm/trunk/lib/ProfileData/SampleProfReader.cpp (original)
+++ llvm/trunk/lib/ProfileData/SampleProfReader.cpp Thu Apr 13 14:52:10 2017
@@ -211,7 +211,7 @@ std::error_code SampleProfileReaderText:
           InlineStack.pop_back();
         }
         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
-            LineLocation(LineOffset, Discriminator));
+            LineLocation(LineOffset, Discriminator))[FName];
         FSamples.setName(FName);
         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
         InlineStack.push_back(&FSamples);
@@ -363,8 +363,8 @@ SampleProfileReaderBinary::readProfile(F
     if (std::error_code EC = FName.getError())
       return EC;
 
-    FunctionSamples &CalleeProfile =
-        FProfile.functionSamplesAt(LineLocation(*LineOffset, *Discriminator));
+    FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
+        LineLocation(*LineOffset, *Discriminator))[*FName];
     CalleeProfile.setName(*FName);
     if (std::error_code EC = readProfile(CalleeProfile))
       return EC;
@@ -636,7 +636,7 @@ std::error_code SampleProfileReaderGCC::
     uint32_t LineOffset = Offset >> 16;
     uint32_t Discriminator = Offset & 0xffff;
     FProfile = &CallerProfile->functionSamplesAt(
-        LineLocation(LineOffset, Discriminator));
+        LineLocation(LineOffset, Discriminator))[Name];
   }
   FProfile->setName(Name);
 

Modified: llvm/trunk/lib/ProfileData/SampleProfWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ProfileData/SampleProfWriter.cpp?rev=300240&r1=300239&r2=300240&view=diff
==============================================================================
--- llvm/trunk/lib/ProfileData/SampleProfWriter.cpp (original)
+++ llvm/trunk/lib/ProfileData/SampleProfWriter.cpp Thu Apr 13 14:52:10 2017
@@ -68,20 +68,21 @@ std::error_code SampleProfileWriterText:
     OS << "\n";
   }
 
-  SampleSorter<LineLocation, FunctionSamples> SortedCallsiteSamples(
+  SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
       S.getCallsiteSamples());
   Indent += 1;
-  for (const auto &I : SortedCallsiteSamples.get()) {
-    LineLocation Loc = I->first;
-    const FunctionSamples &CalleeSamples = I->second;
-    OS.indent(Indent);
-    if (Loc.Discriminator == 0)
-      OS << Loc.LineOffset << ": ";
-    else
-      OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
-    if (std::error_code EC = write(CalleeSamples))
-      return EC;
-  }
+  for (const auto &I : SortedCallsiteSamples.get())
+    for (const auto &FS : I->second) {
+      LineLocation Loc = I->first;
+      const FunctionSamples &CalleeSamples = FS.second;
+      OS.indent(Indent);
+      if (Loc.Discriminator == 0)
+        OS << Loc.LineOffset << ": ";
+      else
+        OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
+      if (std::error_code EC = write(CalleeSamples))
+        return EC;
+    }
   Indent -= 1;
 
   return sampleprof_error::success;
@@ -109,11 +110,12 @@ void SampleProfileWriterBinary::addNames
   }
 
   // Recursively add all the names for inlined callsites.
-  for (const auto &J : S.getCallsiteSamples()) {
-    const FunctionSamples &CalleeSamples = J.second;
-    addName(CalleeSamples.getName());
-    addNames(CalleeSamples);
-  }
+  for (const auto &J : S.getCallsiteSamples())
+    for (const auto &FS : J.second) {
+      const FunctionSamples &CalleeSamples = FS.second;
+      addName(CalleeSamples.getName());
+      addNames(CalleeSamples);
+    }
 }
 
 std::error_code SampleProfileWriterBinary::writeHeader(
@@ -187,14 +189,15 @@ std::error_code SampleProfileWriterBinar
 
   // Recursively emit all the callsite samples.
   encodeULEB128(S.getCallsiteSamples().size(), OS);
-  for (const auto &J : S.getCallsiteSamples()) {
-    LineLocation Loc = J.first;
-    const FunctionSamples &CalleeSamples = J.second;
-    encodeULEB128(Loc.LineOffset, OS);
-    encodeULEB128(Loc.Discriminator, OS);
-    if (std::error_code EC = writeBody(CalleeSamples))
-      return EC;
-  }
+  for (const auto &J : S.getCallsiteSamples())
+    for (const auto &FS : J.second) {
+      LineLocation Loc = J.first;
+      const FunctionSamples &CalleeSamples = FS.second;
+      encodeULEB128(Loc.LineOffset, OS);
+      encodeULEB128(Loc.Discriminator, OS);
+      if (std::error_code EC = writeBody(CalleeSamples))
+        return EC;
+    }
 
   return sampleprof_error::success;
 }

Modified: llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp?rev=300240&r1=300239&r2=300240&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp Thu Apr 13 14:52:10 2017
@@ -162,6 +162,8 @@ protected:
   ErrorOr<uint64_t> getInstWeight(const Instruction &I);
   ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
   const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
+  std::vector<const FunctionSamples *>
+  findIndirectCallFunctionSamples(const Instruction &I) const;
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &ImportGUIDs);
@@ -330,11 +332,12 @@ SampleCoverageTracker::countUsedRecords(
   // If there are inlined callsites in this function, count the samples found
   // in the respective bodies. However, do not bother counting callees with 0
   // total samples, these are callees that were never invoked at runtime.
-  for (const auto &I : FS->getCallsiteSamples()) {
-    const FunctionSamples *CalleeSamples = &I.second;
-    if (callsiteIsHot(FS, CalleeSamples))
-      Count += countUsedRecords(CalleeSamples);
-  }
+  for (const auto &I : FS->getCallsiteSamples())
+    for (const auto &J : I.second) {
+      const FunctionSamples *CalleeSamples = &J.second;
+      if (callsiteIsHot(FS, CalleeSamples))
+        Count += countUsedRecords(CalleeSamples);
+    }
 
   return Count;
 }
@@ -347,11 +350,12 @@ SampleCoverageTracker::countBodyRecords(
   unsigned Count = FS->getBodySamples().size();
 
   // Only count records in hot callsites.
-  for (const auto &I : FS->getCallsiteSamples()) {
-    const FunctionSamples *CalleeSamples = &I.second;
-    if (callsiteIsHot(FS, CalleeSamples))
-      Count += countBodyRecords(CalleeSamples);
-  }
+  for (const auto &I : FS->getCallsiteSamples())
+    for (const auto &J : I.second) {
+      const FunctionSamples *CalleeSamples = &J.second;
+      if (callsiteIsHot(FS, CalleeSamples))
+        Count += countBodyRecords(CalleeSamples);
+    }
 
   return Count;
 }
@@ -366,11 +370,12 @@ SampleCoverageTracker::countBodySamples(
     Total += I.second.getSamples();
 
   // Only count samples in hot callsites.
-  for (const auto &I : FS->getCallsiteSamples()) {
-    const FunctionSamples *CalleeSamples = &I.second;
-    if (callsiteIsHot(FS, CalleeSamples))
-      Total += countBodySamples(CalleeSamples);
-  }
+  for (const auto &I : FS->getCallsiteSamples())
+    for (const auto &J : I.second) {
+      const FunctionSamples *CalleeSamples = &J.second;
+      if (callsiteIsHot(FS, CalleeSamples))
+        Total += countBodySamples(CalleeSamples);
+    }
 
   return Total;
 }
@@ -559,12 +564,49 @@ SampleProfileLoader::findCalleeFunctionS
   if (!DIL) {
     return nullptr;
   }
+
+  StringRef CalleeName;
+  if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
+    if (Function *Callee = CI->getCalledFunction())
+      CalleeName = Callee->getName();
+
   const FunctionSamples *FS = findFunctionSamples(Inst);
   if (FS == nullptr)
     return nullptr;
 
   return FS->findFunctionSamplesAt(
-      LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()));
+      LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName);
+}
+
+/// Returns a vector of FunctionSamples that are the indirect call targets
+/// of \p Inst. The vector is sorted by the total number of samples.
+std::vector<const FunctionSamples *>
+SampleProfileLoader::findIndirectCallFunctionSamples(
+    const Instruction &Inst) const {
+  const DILocation *DIL = Inst.getDebugLoc();
+  std::vector<const FunctionSamples *> R;
+
+  if (!DIL) {
+    return R;
+  }
+
+  const FunctionSamples *FS = findFunctionSamples(Inst);
+  if (FS == nullptr)
+    return R;
+
+  if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
+          LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
+    if (M->size() == 0)
+      return R;
+    for (const auto &NameFS : *M) {
+      R.push_back(&NameFS.second);
+    }
+    std::sort(R.begin(), R.end(),
+              [](const FunctionSamples *L, const FunctionSamples *R) {
+                return L->getTotalSamples() > R->getTotalSamples();
+              });
+  }
+  return R;
 }
 
 /// \brief Get the FunctionSamples for an instruction.
@@ -578,18 +620,23 @@ SampleProfileLoader::findCalleeFunctionS
 /// \returns the FunctionSamples pointer to the inlined instance.
 const FunctionSamples *
 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
-  SmallVector<LineLocation, 10> S;
+  SmallVector<std::pair<LineLocation, StringRef>, 10> S;
   const DILocation *DIL = Inst.getDebugLoc();
-  if (!DIL) {
+  if (!DIL)
     return Samples;
+
+  const DILocation *PrevDIL = DIL;
+  for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
+    S.push_back(std::make_pair(
+        LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()),
+        PrevDIL->getScope()->getSubprogram()->getLinkageName()));
+    PrevDIL = DIL;
   }
-  for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt())
-    S.push_back(LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()));
   if (S.size() == 0)
     return Samples;
   const FunctionSamples *FS = Samples;
   for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
-    FS = FS->findFunctionSamplesAt(S[i]);
+    FS = FS->findFunctionSamplesAt(S[i].first, S[i].second);
   }
   return FS;
 }
@@ -638,25 +685,27 @@ bool SampleProfileLoader::inlineHotFunct
       Function *CalledFunction = CallSite(I).getCalledFunction();
       Instruction *DI = I;
       if (!CalledFunction && !PromotedInsns.count(I) &&
-          CallSite(I).isIndirectCall()) {
-        auto CalleeFunctionName = findCalleeFunctionSamples(*I)->getName();
-        const char *Reason = "Callee function not available";
-        CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
-        if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
-          // The indirect target was promoted and inlined in the profile, as a
-          // result, we do not have profile info for the branch probability.
-          // We set the probability to 80% taken to indicate that the static
-          // call is likely taken.
-          DI = dyn_cast<Instruction>(
-              promoteIndirectCall(I, CalledFunction, 80, 100, false)
-                  ->stripPointerCasts());
-          PromotedInsns.insert(I);
-        } else {
-          DEBUG(dbgs() << "\nFailed to promote indirect call to "
-                       << CalleeFunctionName << " because " << Reason << "\n");
-          continue;
+          CallSite(I).isIndirectCall())
+        for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
+          auto CalleeFunctionName = FS->getName();
+          const char *Reason = "Callee function not available";
+          CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
+          if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
+            // The indirect target was promoted and inlined in the profile, as a
+            // result, we do not have profile info for the branch probability.
+            // We set the probability to 80% taken to indicate that the static
+            // call is likely taken.
+            DI = dyn_cast<Instruction>(
+                promoteIndirectCall(I, CalledFunction, 80, 100, false)
+                    ->stripPointerCasts());
+            PromotedInsns.insert(I);
+          } else {
+            DEBUG(dbgs() << "\nFailed to promote indirect call to "
+                         << CalleeFunctionName << " because " << Reason
+                         << "\n");
+            continue;
+          }
         }
-      }
       if (!CalledFunction || !CalledFunction->getSubprogram()) {
         findCalleeFunctionSamples(*I)->findImportedFunctions(
             ImportGUIDs, F.getParent(),

Modified: llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.prof?rev=300240&r1=300239&r2=300240&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.prof (original)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.prof Thu Apr 13 14:52:10 2017
@@ -1,8 +1,10 @@
 test:63067:0
  4: 3345 _Z3barv:1398 _Z3foov:2059
 test_inline:3000:0
- 5: foo_inline:3000
+ 5: foo_inline1:3000
   1: 3000
+ 5: foo_inline2:4000
+  1: 4000
 test_noinline:3000:0
  5: foo_noinline:3000
   1: 3000

Modified: llvm/trunk/test/Transforms/SampleProfile/indirect-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/indirect-call.ll?rev=300240&r1=300239&r2=300240&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/indirect-call.ll (original)
+++ llvm/trunk/test/Transforms/SampleProfile/indirect-call.ll Thu Apr 13 14:52:10 2017
@@ -16,10 +16,14 @@ define void @test_inline(i64* (i32*)*, i
   %2 = alloca i64* (i32*)*
   store i64* (i32*)* %0, i64* (i32*)** %2
   %3 = load i64* (i32*)*, i64* (i32*)** %2
-; CHECK: icmp {{.*}} @foo_inline
+; CHECK: icmp {{.*}} @foo_inline2
 ; CHECK: if.true.direct_targ:
 ; CHECK-NOT: call
 ; CHECK: if.false.orig_indirect:
+; CHECK: icmp {{.*}} @foo_inline1
+; CHECK: if.true.direct_targ1:
+; CHECK-NOT: call
+; CHECK: if.false.orig_indirect2:
 ; CHECK: call
   call i64* %3(i32* %x), !dbg !5
   ret void
@@ -39,7 +43,11 @@ define void @test_noinline(void ()*) !db
 
 @x = global i32 0, align 4
 
-define i32* @foo_inline(i32* %x) !dbg !3 {
+define i32* @foo_inline1(i32* %x) !dbg !3 {
+  ret i32* %x
+}
+
+define i32* @foo_inline2(i32* %x) !dbg !3 {
   ret i32* %x
 }
 




More information about the llvm-commits mailing list