[llvm] f28ee1a - [CSSPGO] Update pseudo probe distribution factor based on inline context.

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Sun May 16 23:11:50 PDT 2021


Author: Hongtao Yu
Date: 2021-05-16T23:11:36-07:00
New Revision: f28ee1a2b386ad7fa7489cad55840e78849b4abf

URL: https://github.com/llvm/llvm-project/commit/f28ee1a2b386ad7fa7489cad55840e78849b4abf
DIFF: https://github.com/llvm/llvm-project/commit/f28ee1a2b386ad7fa7489cad55840e78849b4abf.diff

LOG: [CSSPGO] Update pseudo probe distribution factor based on inline context.

With prelink inlining, pseudo probes with same ID can come from different inline contexts. Such probes should not share samples and their factors should be fixed up separately.

I'm seeing 0.3% speedup for SPEC2017 overall. Benchmark 631.deepsjeng_s benefits the most, about 4%.

Reviewed By: wenlei, wmi

Differential Revision: https://reviews.llvm.org/D102429

Added: 
    llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll

Modified: 
    llvm/include/llvm/IR/PseudoProbe.h
    llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
    llvm/lib/Transforms/IPO/SampleProfileProbe.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index 58d83a6e2b54e..a4148f09d7649 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -80,6 +80,9 @@ struct PseudoProbe {
   uint32_t Id;
   uint32_t Type;
   uint32_t Attr;
+  // Distribution factor that estimates the portion of the real execution count.
+  // A saturated distribution factor stands for 1.0 or 100%. A pesudo probe has
+  // a factor with the value ranged from 0.0 to 1.0.
   float Factor;
 
   bool isDangling() const {

diff  --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 0fd79d8ff7f39..43f4bc78140fc 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -33,7 +33,10 @@ class Module;
 using namespace sampleprof;
 using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>;
 using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>;
-using ProbeFactorMap = std::unordered_map<uint64_t, float>;
+// Map from tuples of Probe id and inline stack hash code to distribution
+// factors.
+using ProbeFactorMap = std::unordered_map<std::pair<uint64_t, uint64_t>, float,
+                                          pair_hash<uint64_t, uint64_t>>;
 using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
 
 enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid };
@@ -135,6 +138,18 @@ class SampleProfileProbePass : public PassInfoMixin<SampleProfileProbePass> {
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
+// Pseudo probe distribution factor updater.
+// Sample profile annotation can happen in both LTO prelink and postlink. The
+// postlink-time re-annotation can degrade profile quality because of prelink
+// code duplication transformation, such as loop unrolling, jump threading,
+// indirect call promotion etc. As such, samples corresponding to a source
+// location may be aggregated multiple times in postlink. With a concept of
+// distribution factor for pseudo probes, samples can be distributed among
+// duplicated probes reasonable based on the assumption that optimizations
+// duplicating code well-maintain the branch frequency information (BFI). This
+// pass updates distribution factors for each pseudo probe at the end of the
+// prelink pipeline, to reflect an estimated portion of the real execution
+// count.
 class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
   void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
 

diff  --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 8c17a98ab091d..6c3752d986b8f 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -50,6 +50,27 @@ static cl::opt<bool>
     UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
                       cl::desc("Update pseudo probe distribution factor"));
 
+static uint64_t getCallStackHash(const DILocation *DIL) {
+  uint64_t Hash = 0;
+  const DILocation *InlinedAt = DIL ? DIL->getInlinedAt() : nullptr;
+  while (InlinedAt) {
+    Hash ^= MD5Hash(std::to_string(InlinedAt->getLine()));
+    Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn()));
+    const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
+    // Use linkage name for C++ if possible.
+    auto Name = SP->getLinkageName();
+    if (Name.empty())
+      Name = SP->getName();
+    Hash ^= MD5Hash(Name);
+    InlinedAt = InlinedAt->getInlinedAt();
+  }
+  return Hash;
+}
+
+static uint64_t computeCallStackHash(const Instruction &Inst) {
+  return getCallStackHash(Inst.getDebugLoc());
+}
+
 bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
   // Skip function declaration.
   if (F->isDeclaration())
@@ -117,8 +138,10 @@ void PseudoProbeVerifier::runAfterPass(const Loop *L) {
 void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
                                               ProbeFactorMap &ProbeFactors) {
   for (const auto &I : *Block) {
-    if (Optional<PseudoProbe> Probe = extractProbe(I))
-      ProbeFactors[Probe->Id] += Probe->Factor;
+    if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+      uint64_t Hash = computeCallStackHash(I);
+      ProbeFactors[{Probe->Id, Hash}] += Probe->Factor;
+    }
   }
 }
 
@@ -136,7 +159,7 @@ void PseudoProbeVerifier::verifyProbeFactors(
           dbgs() << "Function " << F->getName() << ":\n";
           BannerPrinted = true;
         }
-        dbgs() << "Probe " << I.first << "\tprevious factor "
+        dbgs() << "Probe " << I.first.first << "\tprevious factor "
                << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
                << format("%0.2f", CurProbeFactor) << "\n";
       }
@@ -407,8 +430,10 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
         // execution count of the probe. The original samples of the probe will
         // be distributed among the rest probes if there are any, this is
         // less-than-deal but at least we don't lose any samples.
-        if (!Probe->isDangling())
-          ProbeFactors[Probe->Id] += BBProfileCount(&Block);
+        if (!Probe->isDangling()) {
+          uint64_t Hash = computeCallStackHash(I);
+          ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block);
+        }
       }
     }
   }
@@ -420,7 +445,8 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
         // Ignore danling probes since they are logically deleted and should do
         // not consume any profile samples in the subsequent profile annotation.
         if (!Probe->isDangling()) {
-          float Sum = ProbeFactors[Probe->Id];
+          uint64_t Hash = computeCallStackHash(I);
+          float Sum = ProbeFactors[{Probe->Id, Hash}];
           if (Sum != 0)
             setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
         }

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll
new file mode 100644
index 0000000000000..19e83649723d6
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -passes='pseudo-probe-update' -S  | FileCheck %s
+
+declare i32 @f1()
+
+declare i32 @f2()
+
+declare void @f3()
+
+define i32 @foo(i1 %cond, i1 %cond2) !dbg !4 !prof !10 {
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1)
+  br i1 %cond, label %T1, label %Merge, !prof !11
+
+T1:                                               ; preds = %0
+  %v1 = call i32 @f1(), !prof !12
+  %cond3 = icmp eq i32 %v1, 412
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1)
+;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !13
+;; Probe 7 has two copies, since they don't share the same inline context, they are not
+;; considered sharing samples, thus their distribution factors are not fixed up.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 7, i32 0, i64 -1)
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !13
+;; Similar to Probe 7, one copy of Probe 8 doesn't have inline context.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 8, i32 0, i64 -1)
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 8, i32 0, i64 -1), !dbg !13
+  br i1 %cond3, label %T2, label %F2, !prof !11
+
+Merge:                                            ; preds = %0
+  %v2 = call i32 @f2(), !prof !12
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1)
+;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 8513881922462547968)
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 8513881922462547968), !dbg !13
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 7, i32 0, i64 -1)
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !18
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 8, i32 0, i64 -1)
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 8, i32 0, i64 -1), !dbg !19 
+  br i1 %cond2, label %T2, label %F2, !prof !11
+
+T2:                                               ; preds = %Merge, %T1
+  %B1 = phi i32 [ %v1, %T1 ], [ %v2, %Merge ]
+  call void @f3(), !prof !12
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 5, i32 0, i64 -1)
+  ret i32 %B1
+
+F2:                                               ; preds = %Merge, %T1
+  %B2 = phi i32 [ %v1, %T1 ], [ %v2, %Merge ]
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1)
+  ret i32 %B2
+}
+
+; Function Attrs: inaccessiblememonly nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
+
+attributes #0 = { inaccessiblememonly nounwind willreturn }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.pseudo_probe_desc = !{!2, !3}
+
+!0 = !{i32 7, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !{i64 6699318081062747564, i64 281479271677951, !"foo", null}
+!3 = !{i64 6468398850841090686, i64 138828622701, !"zen", null}
+!4 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 9, type: !6, scopeLine: 9, spFlags: DISPFlagDefinition, unit: !9)
+!5 = !DIFile(filename: "test.cpp", directory: "test")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8, !8}
+!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
+!10 = !{!"function_entry_count", i64 14}
+!11 = !{!"branch_weights", i32 8, i32 7}
+!12 = !{!"branch_weights", i32 7}
+!13 = !DILocation(line: 39, column: 9, scope: !14, inlinedAt: !16)
+!14 = distinct !DILexicalBlock(scope: !15, file: !5, line: 39, column: 7)
+!15 = distinct !DISubprogram(name: "zen", scope: !5, file: !5, line: 37, type: !6, scopeLine: 38, spFlags: DISPFlagDefinition, unit: !9)
+!16 = distinct !DILocation(line: 10, column: 11, scope: !17)
+!17 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646551)
+!18 = !DILocation(line: 53, column: 3, scope: !15, inlinedAt: !19)
+!19 = !DILocation(line: 12, column: 3, scope: !4)
\ No newline at end of file


        


More information about the llvm-commits mailing list