[llvm] r275707 - [ThinLTO] Perform profile-guided indirect call promotion

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 17 07:47:02 PDT 2016


Author: tejohnson
Date: Sun Jul 17 09:47:01 2016
New Revision: 275707

URL: http://llvm.org/viewvc/llvm-project?rev=275707&view=rev
Log:
[ThinLTO] Perform profile-guided indirect call promotion

Summary:
To enable profile-guided indirect call promotion in ThinLTO mode, we
simply add call graph edges for each profitable target from the profile
to the summaries, then the summary-guided importing will consider the
callee for importing as usual.

Also we need to enable the indirect call promotion pass creation in the
PassManagerBuilder when PerformThinLTO=true (we are in the ThinLTO
backend), so that the newly imported functions are considered for
promotion in the backends.

The IC promotion profiles refer to callees by GUID, which required
adding GUIDs to the per-module VST in bitcode (and assigning them
valueIds similar to how they are assigned valueIds in the combined
index).

Reviewers: mehdi_amini, xur

Subscribers: mehdi_amini, davidxl, llvm-commits

Differential Revision: http://reviews.llvm.org/D21932

Added:
    llvm/trunk/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll
    llvm/trunk/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll
Modified:
    llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h
    llvm/trunk/lib/Analysis/ModuleSummaryAnalysis.cpp
    llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp
    llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp

Modified: llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h?rev=275707&r1=275706&r2=275707&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h (original)
+++ llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h Sun Jul 17 09:47:01 2016
@@ -80,6 +80,7 @@ struct ValueInfo {
     assert(Kind == VI_Value && "Not a Value type");
     return TheValue.V;
   }
+  bool isGUID() const { return Kind == VI_GUID; }
 };
 
 /// \brief Function and variable summary information to aid decisions and
@@ -261,6 +262,14 @@ public:
     CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info));
   }
 
+  /// Record a call graph edge from this function to each function GUID recorded
+  /// in \p CallGraphEdges.
+  void
+  addCallGraphEdges(DenseMap<GlobalValue::GUID, CalleeInfo> &CallGraphEdges) {
+    for (auto &EI : CallGraphEdges)
+      addCallGraphEdge(EI.first, EI.second);
+  }
+
   /// Record a call graph edge from this function to the function identified
   /// by \p CalleeV, with \p CalleeInfo including the cumulative profile
   /// count (across all calls from this function) or 0 if no PGO.

Modified: llvm/trunk/lib/Analysis/ModuleSummaryAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ModuleSummaryAnalysis.cpp?rev=275707&r1=275706&r2=275707&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ModuleSummaryAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/ModuleSummaryAnalysis.cpp Sun Jul 17 09:47:01 2016
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Dominators.h"
@@ -73,7 +74,9 @@ void ModuleSummaryIndexBuilder::computeF
   // Map from callee ValueId to profile count. Used to accumulate profile
   // counts for all static calls to a given callee.
   DenseMap<const Value *, CalleeInfo> CallGraphEdges;
+  DenseMap<GlobalValue::GUID, CalleeInfo> IndirectCallEdges;
   DenseSet<const Value *> RefEdges;
+  ICallPromotionAnalysis ICallAnalysis;
 
   SmallPtrSet<const User *, 8> Visited;
   for (const BasicBlock &BB : F)
@@ -83,13 +86,29 @@ void ModuleSummaryIndexBuilder::computeF
 
       if (auto CS = ImmutableCallSite(&I)) {
         auto *CalledFunction = CS.getCalledFunction();
-        if (CalledFunction && CalledFunction->hasName() &&
-            !CalledFunction->isIntrinsic()) {
-          auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
-          auto *CalleeId =
-              M->getValueSymbolTable().lookup(CalledFunction->getName());
-          CallGraphEdges[CalleeId] +=
-              (ScaledCount ? ScaledCount.getValue() : 0);
+        // Check if this is a direct call to a known function.
+        if (CalledFunction) {
+          if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) {
+            auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
+            auto *CalleeId =
+                M->getValueSymbolTable().lookup(CalledFunction->getName());
+            CallGraphEdges[CalleeId] +=
+                (ScaledCount ? ScaledCount.getValue() : 0);
+          }
+        } else {
+          // Otherwise, check for an indirect call (call to a non-const value
+          // that isn't an inline assembly call).
+          const CallInst *CI = dyn_cast<CallInst>(&I);
+          if (CS.getCalledValue() && !isa<Constant>(CS.getCalledValue()) &&
+              !(CI && CI->isInlineAsm())) {
+            uint32_t NumVals, NumCandidates;
+            uint64_t TotalCount;
+            auto CandidateProfileData =
+                ICallAnalysis.getPromotionCandidatesForInstruction(
+                    &I, NumVals, TotalCount, NumCandidates);
+            for (auto &Candidate : CandidateProfileData)
+              IndirectCallEdges[Candidate.Value] += Candidate.Count;
+          }
         }
       }
       findRefEdges(&I, RefEdges, Visited);
@@ -99,6 +118,7 @@ void ModuleSummaryIndexBuilder::computeF
   std::unique_ptr<FunctionSummary> FuncSummary =
       llvm::make_unique<FunctionSummary>(Flags, NumInsts);
   FuncSummary->addCallGraphEdges(CallGraphEdges);
+  FuncSummary->addCallGraphEdges(IndirectCallEdges);
   FuncSummary->addRefEdges(RefEdges);
   Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary));
 }

Modified: llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp?rev=275707&r1=275706&r2=275707&view=diff
==============================================================================
--- llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp (original)
+++ llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp Sun Jul 17 09:47:01 2016
@@ -119,6 +119,14 @@ class ModuleBitcodeWriter : public Bitco
   /// The start bit of the module block, for use in generating a module hash
   uint64_t BitcodeStartBit = 0;
 
+  /// Map that holds the correspondence between GUIDs in the summary index,
+  /// that came from indirect call profiles, and a value id generated by this
+  /// class to use in the VST and summary block records.
+  std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
+
+  /// Tracks the last value id recorded in the GUIDToValueMap.
+  unsigned GlobalValueId;
+
 public:
   /// Constructs a ModuleBitcodeWriter object for the given Module,
   /// writing to the provided \p Buffer.
@@ -132,6 +140,25 @@ public:
     // will start at the bitcode, and we need the offset of the VST
     // to line up.
     BitcodeStartBit = Stream.GetCurrentBitNo();
+
+    // Assign ValueIds to any callee values in the index that came from
+    // indirect call profiles and were recorded as a GUID not a Value*
+    // (which would have been assigned an ID by the ValueEnumerator).
+    // The starting ValueId is just after the number of values in the
+    // ValueEnumerator, so that they can be emitted in the VST.
+    GlobalValueId = VE.getValues().size();
+    if (Index)
+      for (const auto &GUIDSummaryLists : *Index)
+        // Examine all summaries for this GUID.
+        for (auto &Summary : GUIDSummaryLists.second)
+          if (auto FS = dyn_cast<FunctionSummary>(Summary.get()))
+            // For each call in the function summary, see if the call
+            // is to a GUID (which means it is for an indirect call,
+            // otherwise we would have a Value for it). If so, synthesize
+            // a value id.
+            for (auto &CallEdge : FS->calls())
+              if (CallEdge.first.isGUID())
+                assignValueId(CallEdge.first.getGUID());
   }
 
 private:
@@ -260,6 +287,22 @@ private:
                                   unsigned FSModRefsAbbrev);
   void writePerModuleGlobalValueSummary();
   void writeModuleHash(size_t BlockStartPos);
+
+  void assignValueId(GlobalValue::GUID ValGUID) {
+    GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
+  }
+  unsigned getValueId(GlobalValue::GUID ValGUID) {
+    const auto &VMI = GUIDToValueIdMap.find(ValGUID);
+    assert(VMI != GUIDToValueIdMap.end());
+    return VMI->second;
+  }
+  // Helper to get the valueId for the type of value recorded in VI.
+  unsigned getValueId(ValueInfo VI) {
+    if (VI.isGUID())
+      return getValueId(VI.getGUID());
+    return VE.getValueID(VI.getValue());
+  }
+  std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
 };
 
 /// Class to manage the bitcode writing for a combined index.
@@ -2707,6 +2750,7 @@ void ModuleBitcodeWriter::writeValueSymb
   unsigned FnEntry8BitAbbrev;
   unsigned FnEntry7BitAbbrev;
   unsigned FnEntry6BitAbbrev;
+  unsigned GUIDEntryAbbrev;
   if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
     // 8-bit fixed-width VST_CODE_FNENTRY function strings.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@@ -2734,11 +2778,19 @@ void ModuleBitcodeWriter::writeValueSymb
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
     FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+    // FIXME: Change the name of this record as it is now used by
+    // the per-module index as well.
+    Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
+    GUIDEntryAbbrev = Stream.EmitAbbrev(Abbv);
   }
 
   // FIXME: Set up the abbrev, we know how many values there are!
   // FIXME: We know if the type names can use 7-bit ascii.
-  SmallVector<unsigned, 64> NameVals;
+  SmallVector<uint64_t, 64> NameVals;
 
   for (const ValueName &Name : VST) {
     // Figure out the encoding to use for the name.
@@ -2799,6 +2851,16 @@ void ModuleBitcodeWriter::writeValueSymb
     Stream.EmitRecord(Code, NameVals, AbbrevToUse);
     NameVals.clear();
   }
+  // Emit any GUID valueIDs created for indirect call edges into the
+  // module-level VST.
+  if (IsModuleLevel && hasVSTOffsetPlaceholder())
+    for (const auto &GI : valueIds()) {
+      NameVals.push_back(GI.second);
+      NameVals.push_back(GI.first);
+      Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals,
+                        GUIDEntryAbbrev);
+      NameVals.clear();
+    }
   Stream.ExitBlock();
 }
 
@@ -3220,12 +3282,11 @@ void ModuleBitcodeWriter::writePerModule
   std::sort(Calls.begin(), Calls.end(),
             [this](const FunctionSummary::EdgeTy &L,
                    const FunctionSummary::EdgeTy &R) {
-              return VE.getValueID(L.first.getValue()) <
-                     VE.getValueID(R.first.getValue());
+              return getValueId(L.first) < getValueId(R.first);
             });
   bool HasProfileData = F.getEntryCount().hasValue();
   for (auto &ECI : Calls) {
-    NameVals.push_back(VE.getValueID(ECI.first.getValue()));
+    NameVals.push_back(getValueId(ECI.first));
     assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite");
     NameVals.push_back(ECI.second.CallsiteCount);
     if (HasProfileData)

Modified: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=275707&r1=275706&r2=275707&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp Sun Jul 17 09:47:01 2016
@@ -407,10 +407,11 @@ void PassManagerBuilder::populateModuleP
     /// PGO instrumentation is added during the compile phase for ThinLTO, do
     /// not run it a second time
     addPGOInstrPasses(MPM);
-    // Indirect call promotion that promotes intra-module targets only.
-    MPM.add(createPGOIndirectCallPromotionLegacyPass());
   }
 
+  // Indirect call promotion that promotes intra-module targets only.
+  MPM.add(createPGOIndirectCallPromotionLegacyPass());
+
   if (EnableNonLTOGlobalsModRef)
     // We add a module alias analysis pass here. In part due to bugs in the
     // analysis infrastructure this "works" in that the analysis stays alive

Added: llvm/trunk/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll?rev=275707&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll (added)
+++ llvm/trunk/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll Sun Jul 17 09:47:01 2016
@@ -0,0 +1,7 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @a() {
+entry:
+  ret void
+}

Added: llvm/trunk/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll?rev=275707&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll (added)
+++ llvm/trunk/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll Sun Jul 17 09:47:01 2016
@@ -0,0 +1,32 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -o %t4.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS
+; IMPORTS: Import a
+
+; RUN: opt %t4.bc -pgo-icall-prom -S -icp-count-threshold=1 | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt %t4.bc -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=1 2>&1 | FileCheck %s --check-prefix=PASS-REMARK
+; PASS-REMARK: Promote indirect call to a with count 1 out of 1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at foo = external local_unnamed_addr global void ()*, align 8
+
+define i32 @main() local_unnamed_addr {
+entry:
+  %0 = load void ()*, void ()** @foo, align 8
+; ICALL-PROM:   br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
+  tail call void %0(), !prof !1
+  ret i32 0
+}
+
+!1 = !{!"VP", i32 0, i64 1, i64 -6289574019528802036, i64 1}
+
+; Should not have a VP annotation on new indirect call (check before and after
+; branch_weights annotation).
+; ICALL-PROM-NOT: !"VP"
+; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0}
+; ICALL-PROM-NOT: !"VP"




More information about the llvm-commits mailing list