[llvm] 14756b7 - [SampleFDO] Don't mix up the existing indirect call value profile with the new

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 18 09:55:01 PDT 2021


Author: Wei Mi
Date: 2021-03-18T09:54:34-07:00
New Revision: 14756b70eeba76c0adeb73b82c4e69b35b74cdbe

URL: https://github.com/llvm/llvm-project/commit/14756b70eeba76c0adeb73b82c4e69b35b74cdbe
DIFF: https://github.com/llvm/llvm-project/commit/14756b70eeba76c0adeb73b82c4e69b35b74cdbe.diff

LOG: [SampleFDO] Don't mix up the existing indirect call value profile with the new
value profile annotated after inlining.

In https://reviews.llvm.org/D96806 and https://reviews.llvm.org/D97350, we
use the magic number -1 in the value profile to avoid repeated indirect call
promotion to the same target for an indirect call. Function updateIDTMetaData
is used to mark an target as being promoted in the value profile with the
magic number. updateIDTMetaData is also used to update the value profile
when an indirect call is inlined and new inline instance profile should be
applied. For the second case, currently updateIDTMetaData mixes up the
existing value profile of the indirect call with the new profile, leading
to the problematic senario that a target count is larger than the total count
in the value profile.

The patch fixes the problem. When updateIDTMetaData is used to update the
value profile after inlining, all the values in the existing value profile
will be dropped except the values with the magic number counts.

Differential Revision: https://reviews.llvm.org/D98835

Added: 
    llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp-3.prof
    llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll

Modified: 
    llvm/lib/Transforms/IPO/SampleProfile.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2ecff87f492f..561165aea9b8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -755,14 +755,8 @@ static void
 updateIDTMetaData(Instruction &Inst,
                   const SmallVectorImpl<InstrProfValueData> &CallTargets,
                   uint64_t Sum) {
-  assert((Sum != 0 || (CallTargets.size() == 1 &&
-                       CallTargets[0].Count == NOMORE_ICP_MAGICNUM)) &&
-         "If sum is 0, assume only one element in CallTargets with count "
-         "being NOMORE_ICP_MAGICNUM");
-
   uint32_t NumVals = 0;
   // OldSum is the existing total count in the value profile data.
-  // It will be replaced by Sum if Sum is not 0.
   uint64_t OldSum = 0;
   std::unique_ptr<InstrProfValueData[]> ValueData =
       std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
@@ -771,34 +765,44 @@ updateIDTMetaData(Instruction &Inst,
                                ValueData.get(), NumVals, OldSum, true);
 
   DenseMap<uint64_t, uint64_t> ValueCountMap;
-  // Initialize ValueCountMap with existing value profile data.
-  if (Valid) {
-    for (uint32_t I = 0; I < NumVals; I++)
-      ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
-  }
-
-  for (const auto &Data : CallTargets) {
-    auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
-    if (Pair.second)
-      continue;
-    // Whenever the count is NOMORE_ICP_MAGICNUM for a value, keep it
-    // in the ValueCountMap. If both the count in CallTargets and the
-    // count in ValueCountMap is not NOMORE_ICP_MAGICNUM, keep the
-    // count in CallTargets.
-    if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
-        Data.Count == NOMORE_ICP_MAGICNUM) {
+  if (Sum == 0) {
+    assert((CallTargets.size() == 1 &&
+            CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
+           "If sum is 0, assume only one element in CallTargets "
+           "with count being NOMORE_ICP_MAGICNUM");
+    // Initialize ValueCountMap with existing value profile data.
+    if (Valid) {
+      for (uint32_t I = 0; I < NumVals; I++)
+        ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
+    }
+    auto Pair =
+        ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
+    // If the target already exists in value profile, decrease the total
+    // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
+    if (!Pair.second) {
       OldSum -= Pair.first->second;
       Pair.first->second = NOMORE_ICP_MAGICNUM;
-    } else if (Pair.first->second == NOMORE_ICP_MAGICNUM &&
-               Data.Count != NOMORE_ICP_MAGICNUM) {
+    }
+    Sum = OldSum;
+  } else {
+    // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
+    // counts in the value profile.
+    if (Valid) {
+      for (uint32_t I = 0; I < NumVals; I++) {
+        if (ValueData[I].Count == NOMORE_ICP_MAGICNUM)
+          ValueCountMap[ValueData[I].Value] = ValueData[I].Count;
+      }
+    }
+
+    for (const auto &Data : CallTargets) {
+      auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
+      if (Pair.second)
+        continue;
+      // The target represented by Data.Value has already been promoted.
+      // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
+      // Sum by Data.Count.
       assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
       Sum -= Data.Count;
-    } else if (Pair.first->second != NOMORE_ICP_MAGICNUM &&
-               Data.Count != NOMORE_ICP_MAGICNUM) {
-      // Sum will be used in this case. Although the existing count
-      // for the current value in value profile will be overriden,
-      // no need to update OldSum.
-      Pair.first->second = Data.Count;
     }
   }
 
@@ -818,8 +822,7 @@ updateIDTMetaData(Instruction &Inst,
   uint32_t MaxMDCount =
       std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
   annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
-                    NewCallTargets, Sum ? Sum : OldSum, IPVK_IndirectCallTarget,
-                    MaxMDCount);
+                    NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
 }
 
 /// Attempt to promote indirect call and also inline the promoted call.

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp-3.prof b/llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp-3.prof
new file mode 100644
index 000000000000..a65c792bf070
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp-3.prof
@@ -0,0 +1,6 @@
+_Z3foov:225715:1
+ 2: 5553
+ 3: 5391
+ 1: _Z3goov:5860
+  1: 5279 _Z3hoov:5860 _Z3moov:210
+  2: 5279

diff  --git a/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll b/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll
new file mode 100644
index 000000000000..140a15f58747
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -passes=sample-profile -sample-profile-icp-max-prom=4 -sample-profile-file=%S/Inputs/norepeated-icp-3.prof -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [5 x i8] c"hoo\0A\00", align 1
+ at p = dso_local global void ()* null, align 8
+ at str = private unnamed_addr constant [4 x i8] c"hoo\00", align 1
+
+; Function Attrs: nofree nounwind
+declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) #1
+
+; Function Attrs: uwtable mustprogress
+define dso_local void @_Z3goov() #0 !dbg !11 {
+entry:
+  %0 = load void ()*, void ()** @p, align 8, !dbg !12, !tbaa !13
+  call void %0(), !dbg !17, !prof !22
+  ret void, !dbg !18
+}
+
+; After the indirect call in _Z3goov is inlined into _Z3foov, it will be
+; annotated with new inline instance profile. The existing value profile
+; associated with the indirect call should be dropped except those values
+; wth NOMORE_ICP_MAGICNUM magic number indicating promoted targets.
+; CHECK-LABEL: @_Z3foov(
+; CHECK: call void %0(), {{.*}} !prof ![[PROF_ID:[0-9]+]]
+; CHECK-NEXT: ret void
+
+; Function Attrs: uwtable mustprogress
+define dso_local void @_Z3foov() #0 !dbg !19 {
+entry:
+  call void @_Z3goov(), !dbg !20
+  ret void, !dbg !21
+}
+
+; Function Attrs: nofree nounwind
+declare noundef i32 @puts(i8* nocapture noundef readonly) #2
+
+attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" }
+attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nofree nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "1.cc", directory: "")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!""}
+!8 = !DISubroutineType(types: !2)
+!11 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!12 = !DILocation(line: 7, column: 5, scope: !11)
+!13 = !{!14, !14, i64 0}
+!14 = !{!"any pointer", !15, i64 0}
+!15 = !{!"omnipotent char", !16, i64 0}
+!16 = !{!"Simple C++ TBAA"}
+!17 = !DILocation(line: 7, column: 3, scope: !11)
+!18 = !DILocation(line: 8, column: 1, scope: !11)
+!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!20 = !DILocation(line: 11, column: 3, scope: !19)
+!21 = !DILocation(line: 12, column: 3, scope: !19)
+; The original value 125292384912345234234 and its count 8000 should
+; be dropped when the indirect call is annotated with new profile.
+; The original value -7383239051784516332 and its count -1 should be kept
+; because -1 is NOMORE_ICP_MAGICNUM.
+; CHECK: ![[PROF_ID]] = !{!"VP", i32 0, i64 5860, i64 -7383239051784516332, i64 -1, i64 -7701940972712279918, i64 5860}
+!22 = !{!"VP", i32 0, i64 8000, i64 -7383239051784516332, i64 -1, i64 125292384912345234234, i64 8000}


        


More information about the llvm-commits mailing list